From 6ab94e0b318884bbcb95e2ea3835f951502e1d99 Mon Sep 17 00:00:00 2001 From: jaseg Date: Wed, 14 Oct 2020 12:47:28 +0200 Subject: Move firmware into subdirectory --- .../RefLibs/src/FilteringFunctions/biquad.c | 713 +++++++++++++++++++++ .../RefLibs/src/FilteringFunctions/conv.c | 350 ++++++++++ .../RefLibs/src/FilteringFunctions/correlate.c | 513 +++++++++++++++ .../RefLibs/src/FilteringFunctions/fir.c | 325 ++++++++++ .../RefLibs/src/FilteringFunctions/fir_decimate.c | 386 +++++++++++ .../src/FilteringFunctions/fir_interpolate.c | 291 +++++++++ .../RefLibs/src/FilteringFunctions/fir_lattice.c | 241 +++++++ .../RefLibs/src/FilteringFunctions/fir_sparse.c | 485 ++++++++++++++ .../RefLibs/src/FilteringFunctions/iir_lattice.c | 271 ++++++++ .../RefLibs/src/FilteringFunctions/lms.c | 695 ++++++++++++++++++++ 10 files changed, 4270 insertions(+) create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_decimate.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c create mode 100644 fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c (limited to 'fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions') diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c new file mode 100644 index 0000000..1fe7c54 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/biquad.c @@ -0,0 +1,713 @@ +#include "ref.h" + +void ref_biquad_cascade_df2T_f32( + const arm_biquad_cascade_df2T_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pIn = pSrc; /* source pointer */ + float32_t *pOut = pDst; /* destination pointer */ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */ + float32_t acc; /* accumulator */ + float32_t b0, b1, b2, a1, a2; /* Filter coefficients */ + float32_t Xn; /* temporary input */ + float32_t d1, d2; /* state variables */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /*Reading the state values */ + d1 = pState[0]; + d2 = pState[1]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* y[n] = b0 * x[n] + d1 */ + acc = (b0 * Xn) + d1; + + /* Store the result in the accumulator in the destination buffer. */ + *pOut++ = acc; + + /* Every time after the output is computed state should be updated. */ + /* d1 = b1 * x[n] + a1 * y[n] + d2 */ + d1 = (b1 * Xn + a1 * acc) + d2; + + /* d2 = b2 * x[n] + a2 * y[n] */ + d2 = (b2 * Xn) + (a2 * acc); + + /* decrement the loop counter */ + sample--; + } + + /* Store the updated state variables back into the state array */ + *pState++ = d1; + *pState++ = d2; + + /* The current stage input is given as the output to the next stage */ + pIn = pDst; + + /*Reset the output working pointer */ + pOut = pDst; + + /* decrement the loop counter */ + stage--; + + } while (stage > 0U); +} + + +void ref_biquad_cascade_stereo_df2T_f32( + const arm_biquad_cascade_stereo_df2T_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pIn = pSrc; /* source pointer */ + float32_t *pOut = pDst; /* destination pointer */ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */ + float32_t acc1a, acc1b; /* accumulator */ + float32_t b0, b1, b2, a1, a2; /* Filter coefficients */ + float32_t Xn1a, Xn1b; /* temporary input */ + float32_t d1a, d2a, d1b, d2b; /* state variables */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /*Reading the state values */ + d1a = pState[0]; + d2a = pState[1]; + d1b = pState[2]; + d2b = pState[3]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn1a = *pIn++; //Channel a + Xn1b = *pIn++; //Channel b + + /* y[n] = b0 * x[n] + d1 */ + acc1a = (b0 * Xn1a) + d1a; + acc1b = (b0 * Xn1b) + d1b; + + /* Store the result in the accumulator in the destination buffer. */ + *pOut++ = acc1a; + *pOut++ = acc1b; + + /* Every time after the output is computed state should be updated. */ + /* d1 = b1 * x[n] + a1 * y[n] + d2 */ + d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a; + d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b; + + /* d2 = b2 * x[n] + a2 * y[n] */ + d2a = (b2 * Xn1a) + (a2 * acc1a); + d2b = (b2 * Xn1b) + (a2 * acc1b); + + /* decrement the loop counter */ + sample--; + } + + /* Store the updated state variables back into the state array */ + *pState++ = d1a; + *pState++ = d2a; + *pState++ = d1b; + *pState++ = d2b; + + /* The current stage input is given as the output to the next stage */ + pIn = pDst; + + /*Reset the output working pointer */ + pOut = pDst; + + /* decrement the loop counter */ + stage--; + + } while (stage > 0U); + +} + +void ref_biquad_cascade_df2T_f64( + const arm_biquad_cascade_df2T_instance_f64 * S, + float64_t * pSrc, + float64_t * pDst, + uint32_t blockSize) +{ + float64_t *pIn = pSrc; /* source pointer */ + float64_t *pOut = pDst; /* destination pointer */ + float64_t *pState = S->pState; /* State pointer */ + float64_t *pCoeffs = S->pCoeffs; /* coefficient pointer */ + float64_t acc; /* accumulator */ + float64_t b0, b1, b2, a1, a2; /* Filter coefficients */ + float64_t Xn; /* temporary input */ + float64_t d1, d2; /* state variables */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /*Reading the state values */ + d1 = pState[0]; + d2 = pState[1]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* y[n] = b0 * x[n] + d1 */ + acc = (b0 * Xn) + d1; + + /* Store the result in the accumulator in the destination buffer. */ + *pOut++ = acc; + + /* Every time after the output is computed state should be updated. */ + /* d1 = b1 * x[n] + a1 * y[n] + d2 */ + d1 = (b1 * Xn + a1 * acc) + d2; + + /* d2 = b2 * x[n] + a2 * y[n] */ + d2 = (b2 * Xn) + (a2 * acc); + + /* decrement the loop counter */ + sample--; + } + + /* Store the updated state variables back into the state array */ + *pState++ = d1; + *pState++ = d2; + + /* The current stage input is given as the output to the next stage */ + pIn = pDst; + + /*Reset the output working pointer */ + pOut = pDst; + + /* decrement the loop counter */ + stage--; + + } while (stage > 0U); +} + +void ref_biquad_cascade_df1_f32( + const arm_biquad_casd_df1_inst_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pIn = pSrc; /* source pointer */ + float32_t *pOut = pDst; /* destination pointer */ + float32_t *pState = S->pState; /* pState pointer */ + float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */ + float32_t acc; /* Simulates the accumulator */ + float32_t b0, b1, b2, a1, a2; /* Filter coefficients */ + float32_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */ + float32_t Xn; /* temporary input */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the pState values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + /* The variables acc holds the output value that is computed: + * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] + */ + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2); + + /* Store the result in the accumulator in the destination buffer. */ + *pOut++ = acc; + + /* Every time after the output is computed state should be updated. */ + /* The states should be updated as: */ + /* Xn2 = Xn1 */ + /* Xn1 = Xn */ + /* Yn2 = Yn1 */ + /* Yn1 = acc */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + Yn1 = acc; + + /* decrement the loop counter */ + sample--; + } + + /* Store the updated state variables back into the pState array */ + *pState++ = Xn1; + *pState++ = Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + /* The first stage goes from the input buffer to the output buffer. */ + /* Subsequent numStages occur in-place in the output buffer */ + pIn = pDst; + + /* Reset the output pointer */ + pOut = pDst; + + /* decrement the loop counter */ + stage--; + + } while (stage > 0U); +} + +void ref_biquad_cas_df1_32x64_q31( + const arm_biquad_cas_df1_32x64_ins_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t *pIn = pSrc; /* input pointer initialization */ + q31_t *pOut = pDst; /* output pointer initialization */ + q63_t *pState = S->pState; /* state pointer initialization */ + q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ + q63_t acc; /* accumulator */ + q31_t Xn1, Xn2; /* Input Filter state variables */ + q63_t Yn1, Yn2; /* Output Filter state variables */ + q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ + q31_t Xn; /* temporary input */ + int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ + uint32_t sample, stage = S->numStages; /* loop counters */ + q31_t acc_l, acc_h; /* temporary output */ + uint32_t uShift = ((uint32_t) S->postShift + 1U); + uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the state values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + acc = (q63_t)Xn*b0 + (q63_t)Xn1*b1 + (q63_t)Xn2*b2; + /* acc += a1 * y[n-1] */ + acc += mult32x64(Yn1, a1); + /* acc += a2 * y[n-2] */ + acc += mult32x64(Yn2, a2); + + /* Every time after the output is computed state should be updated. */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + + /* The result is converted to 1.63, Yn1 variable is reused */ + Yn1 = acc << shift; + + /* Calc lower part of acc */ + acc_l = acc & 0xffffffff; + + /* Calc upper part of acc */ + acc_h = (acc >> 32) & 0xffffffff; + + /* Apply shift for lower part of acc and upper part of acc */ + acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift; + + /* Store the output in the destination buffer in 1.31 format. */ + *pOut++ = acc_h; + + /* decrement the loop counter */ + sample--; + } + + /* The first stage output is given as input to the second stage. */ + pIn = pDst; + + /* Reset to destination buffer working pointer */ + pOut = pDst; + + /* Store the updated state variables back into the pState array */ + *pState++ = (q63_t) Xn1; + *pState++ = (q63_t) Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + } while (--stage); +} + +void ref_biquad_cascade_df1_q31( + const arm_biquad_casd_df1_inst_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q63_t acc; /* accumulator */ + uint32_t uShift = ((uint32_t) S->postShift + 1U); + uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */ + q31_t *pIn = pSrc; /* input pointer initialization */ + q31_t *pOut = pDst; /* output pointer initialization */ + q31_t *pState = S->pState; /* pState pointer initialization */ + q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ + q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ + q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ + q31_t Xn; /* temporary input */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the state values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + /* The variables acc holds the output value that is computed: + * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] + */ + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + /* acc = b0 * x[n] */ + acc = (q63_t) b0 *Xn; + + /* acc += b1 * x[n-1] */ + acc += (q63_t) b1 *Xn1; + /* acc += b[2] * x[n-2] */ + acc += (q63_t) b2 *Xn2; + /* acc += a1 * y[n-1] */ + acc += (q63_t) a1 *Yn1; + /* acc += a2 * y[n-2] */ + acc += (q63_t) a2 *Yn2; + + /* The result is converted to 1.31 */ + acc = acc >> lShift; + + /* Every time after the output is computed state should be updated. */ + /* The states should be updated as: */ + /* Xn2 = Xn1 */ + /* Xn1 = Xn */ + /* Yn2 = Yn1 */ + /* Yn1 = acc */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + Yn1 = (q31_t) acc; + + /* Store the output in the destination buffer. */ + *pOut++ = (q31_t) acc; + + /* decrement the loop counter */ + sample--; + } + + /* The first stage goes from the input buffer to the output buffer. */ + /* Subsequent stages occur in-place in the output buffer */ + pIn = pDst; + + /* Reset to destination pointer */ + pOut = pDst; + + /* Store the updated state variables back into the pState array */ + *pState++ = Xn1; + *pState++ = Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + } while (--stage); +} + + +void ref_biquad_cascade_df1_fast_q31( + const arm_biquad_casd_df1_inst_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t acc = 0; /* accumulator */ + q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ + q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ + q31_t *pIn = pSrc; /* input pointer initialization */ + q31_t *pOut = pDst; /* output pointer initialization */ + q31_t *pState = S->pState; /* pState pointer initialization */ + q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ + q31_t Xn; /* temporary input */ + int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ + uint32_t sample, stage = S->numStages; /* loop counters */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the state values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + mult_32x32_keep32_R(acc, b0, Xn); + multAcc_32x32_keep32_R(acc, b1, Xn1); + multAcc_32x32_keep32_R(acc, b2, Xn2); + multAcc_32x32_keep32_R(acc, a1, Yn1); + multAcc_32x32_keep32_R(acc, a2, Yn2); + + /* The result is converted to 1.31 */ + acc <<= shift; + + /* Every time after the output is computed state should be updated. */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + Yn1 = acc; + + /* Store the output in the destination buffer. */ + *pOut++ = acc; + + /* decrement the loop counter */ + sample--; + } + + /* The first stage goes from the input buffer to the output buffer. */ + /* Subsequent stages occur in-place in the output buffer */ + pIn = pDst; + + /* Reset to destination pointer */ + pOut = pDst; + + /* Store the updated state variables back into the pState array */ + *pState++ = Xn1; + *pState++ = Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + } while (--stage); +} + +void ref_biquad_cascade_df1_fast_q15( + const arm_biquad_casd_df1_inst_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pIn = pSrc; /* Source pointer */ + q15_t *pOut = pDst; /* Destination pointer */ + q15_t b0, b1, b2, a1, a2; /* Filter coefficients */ + q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ + q15_t Xn; /* temporary input */ + q31_t acc; /* Accumulator */ + int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + pCoeffs++; // skip the 0 coefficient + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the state values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + acc = (q31_t)b0*Xn + (q31_t)b1*Xn1 + (q31_t)b2*Xn2 + (q31_t)a1*Yn1 + (q31_t)a2*Yn2; + + /* The result is converted to 1.15 */ + acc = ref_sat_q15(acc >> shift); + + /* Every time after the output is computed state should be updated. */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + Yn1 = (q15_t) acc; + + /* Store the output in the destination buffer. */ + *pOut++ = (q15_t) acc; + + /* decrement the loop counter */ + sample--; + } + + /* The first stage goes from the input buffer to the output buffer. */ + /* Subsequent stages occur in-place in the output buffer */ + pIn = pDst; + + /* Reset to destination pointer */ + pOut = pDst; + + /* Store the updated state variables back into the pState array */ + *pState++ = Xn1; + *pState++ = Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + } while (--stage); +} + +void ref_biquad_cascade_df1_q15( + const arm_biquad_casd_df1_inst_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pIn = pSrc; /* Source pointer */ + q15_t *pOut = pDst; /* Destination pointer */ + q15_t b0, b1, b2, a1, a2; /* Filter coefficients */ + q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ + q15_t Xn; /* temporary input */ + q63_t acc; /* Accumulator */ + int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ + + do + { + /* Reading the coefficients */ + b0 = *pCoeffs++; + pCoeffs++; // skip the 0 coefficient + b1 = *pCoeffs++; + b2 = *pCoeffs++; + a1 = *pCoeffs++; + a2 = *pCoeffs++; + + /* Reading the state values */ + Xn1 = pState[0]; + Xn2 = pState[1]; + Yn1 = pState[2]; + Yn2 = pState[3]; + + sample = blockSize; + + while (sample > 0U) + { + /* Read the input */ + Xn = *pIn++; + + /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ + acc = (q31_t)b0*Xn + (q31_t)b1*Xn1 + (q31_t)b2*Xn2 + (q31_t)a1*Yn1 + (q31_t)a2*Yn2; + + /* The result is converted to 1.15 */ + acc = ref_sat_q15(acc >> shift); + + /* Every time after the output is computed state should be updated. */ + Xn2 = Xn1; + Xn1 = Xn; + Yn2 = Yn1; + Yn1 = (q15_t) acc; + + /* Store the output in the destination buffer. */ + *pOut++ = (q15_t) acc; + + /* decrement the loop counter */ + sample--; + } + + /* The first stage goes from the input buffer to the output buffer. */ + /* Subsequent stages occur in-place in the output buffer */ + pIn = pDst; + + /* Reset to destination pointer */ + pOut = pDst; + + /* Store the updated state variables back into the pState array */ + *pState++ = Xn1; + *pState++ = Xn2; + *pState++ = Yn1; + *pState++ = Yn2; + + } while (--stage); +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c new file mode 100644 index 0000000..dc1b103 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/conv.c @@ -0,0 +1,350 @@ +#include "ref.h" + +void ref_conv_f32( + float32_t * pSrcA, + uint32_t srcALen, + float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst) +{ + float32_t sum; /* Accumulator */ + uint32_t i, j; /* loop counters */ + + /* Loop to calculate convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry out MAC operations */ + sum = 0.0f; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += pSrcB[i - j] * pSrcA[j]; + } + } + /* Store the output in the destination buffer */ + pDst[i] = sum; + } +} + +arm_status ref_conv_partial_f32( + float32_t * pSrcA, + uint32_t srcALen, + float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_f32(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} + +void ref_conv_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst) +{ + q63_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q63_t) pSrcA[j] * (pSrcB[i - j]); + } + } + + /* Store the output in the destination buffer */ + pDst[i] = (q31_t)(sum >> 31U); + } +} + +void ref_conv_fast_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst) +{ + q31_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum = (q31_t) ((((q63_t)sum << 32) + + ((q63_t)pSrcA[j] * pSrcB[i - j])) >> 32); + } + } + + /* Store the output in the destination buffer */ + pDst[i] = (q31_t)(sum << 1U); + } +} + +arm_status ref_conv_partial_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_q31(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} + +arm_status ref_conv_partial_fast_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_fast_q31(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} + +void ref_conv_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst) +{ + q63_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q31_t)pSrcA[j] * pSrcB[i - j]; + } + } + + /* Store the output in the destination buffer */ + pDst[i] = ref_sat_q15(sum >> 15U); + } +} + +arm_status ref_conv_partial_fast_opt_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints, + q15_t * pScratch1, + q15_t * pScratch2) +{ + q31_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q31_t)pSrcA[j] * pSrcB[i - j]; + } + } + + /* Store the output in the destination buffer */ + pDst[i] = ref_sat_q15(sum >> 15U); + } + + return ARM_MATH_SUCCESS; +} + +void ref_conv_fast_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst) +{ + q31_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q31_t)pSrcA[j] * pSrcB[i - j]; + } + } + + /* Store the output in the destination buffer */ + pDst[i] = sum >> 15U; + } +} + +void ref_conv_fast_opt_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch1, + q15_t * pScratch2) +{ + q31_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q31_t)pSrcA[j] * pSrcB[i - j]; + } + } + + /* Store the output in the destination buffer */ + pDst[i] = ref_sat_q15(sum >> 15U); + } +} + +arm_status ref_conv_partial_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_q15(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} + +arm_status ref_conv_partial_fast_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_fast_q15(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} + + +void ref_conv_q7( + q7_t * pSrcA, + uint32_t srcALen, + q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst) +{ + q31_t sum; /* Accumulator */ + uint32_t i, j; /* loop counter */ + + /* Loop to calculate output of convolution for output length number of times */ + for (i = 0; i < srcALen + srcBLen - 1; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += (q15_t)pSrcA[j] * pSrcB[i - j]; + } + } + + /* Store the output in the destination buffer */ + pDst[i] = (q7_t)ref_sat_q7(sum >> 7); + } +} + +arm_status ref_conv_partial_q7( + q7_t * pSrcA, + uint32_t srcALen, + q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst, + uint32_t firstIndex, + uint32_t numPoints) +{ + ref_conv_q7(pSrcA,srcALen,pSrcB,srcBLen,pDst); + + return ARM_MATH_SUCCESS; +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c new file mode 100644 index 0000000..ff1d95b --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/correlate.c @@ -0,0 +1,513 @@ +#include "ref.h" + +void ref_correlate_f32( + float32_t * pSrcA, + uint32_t srcALen, + float32_t * pSrcB, + uint32_t srcBLen, + float32_t * pDst) +{ + float32_t *pIn1 = pSrcA; /* inputA pointer */ + float32_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + float32_t sum; /* Accumulator */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* The algorithm implementation is based on the lengths of the inputs. + * srcB is always made to slide across srcA. + * So srcBLen is always considered as shorter or equal to srcALen + * But CORR(x, y) is reverse of CORR(y, x) + * So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer + * and a variable, inv is set to 1 + * If lengths are not equal then zero pad has to be done to make the two + * inputs of same length. But to improve the performance, we include zeroes + * in the output instead of zero padding either of the the inputs + * If srcALen > srcBLen, (srcALen - srcBLen) zeroes has to included in the + * starting of the output buffer + * If srcALen < srcBLen, (srcALen - srcBLen) zeroes has to included in the + * ending of the output buffer + * Once the zero padding is done the remaining of the output is calcualted + * using convolution but with the shorter signal time shifted. + */ + + /* Calculate the length of the remaining sequence */ + tot = srcALen + srcBLen - 2U; + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + /* Initialise the pointer after zero padding */ + pDst += srcALen - srcBLen; + } + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + srcALen - 1U; + + /* Initialisation of the pointer after zero padding */ + pDst += tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + } + + /* Loop to calculate convolution for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0.0f; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((i - j < srcBLen) && (j < srcALen)) + { + /* z[i] += x[i-j] * y[j] */ + sum += pIn1[j] * pIn2[-((int32_t)i - j)]; + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = sum; + else + *pDst++ = sum; + } +} + +void ref_correlate_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst) +{ + q31_t *pIn1 = pSrcA; /* inputA pointer */ + q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q63_t sum; /* Accumulators */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate correlation for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to correlation equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum += ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)]); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q31_t)(sum >> 31U); + else + *pDst++ = (q31_t)(sum >> 31U); + } +} + +void ref_correlate_fast_q31( + q31_t * pSrcA, + uint32_t srcALen, + q31_t * pSrcB, + uint32_t srcBLen, + q31_t * pDst) +{ + q31_t *pIn1 = pSrcA; /* inputA pointer */ + q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q63_t sum; /* Accumulators */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate correlation for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to correlation equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum = (q31_t) ((((q63_t) sum << 32) + + ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)])) >> 32); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q31_t)(sum << 1U); + else + *pDst++ = (q31_t)(sum << 1U); + } +} + +void ref_correlate_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst) +{ + q15_t *pIn1 = pSrcA; /* inputA pointer */ + q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q63_t sum; /* Accumulators */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate convolution for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q15_t) ref_sat_q15(sum >> 15U); + else + *pDst++ = (q15_t) ref_sat_q15(sum >> 15U); + } +} + +void ref_correlate_fast_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst) +{ + q15_t *pIn1 = pSrcA; /* inputA pointer */ + q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q63_t sum; /* Accumulators */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate convolution for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q15_t)(sum >> 15U); + else + *pDst++ = (q15_t)(sum >> 15U); + } +} + +void ref_correlate_fast_opt_q15( + q15_t * pSrcA, + uint32_t srcALen, + q15_t * pSrcB, + uint32_t srcBLen, + q15_t * pDst, + q15_t * pScratch) +{ + q15_t *pIn1 = pSrcA; /* inputA pointer */ + q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q31_t sum; /* Accumulators */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate convolution for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q15_t) ref_sat_q15(sum >> 15U); + else + *pDst++ = (q15_t) ref_sat_q15(sum >> 15U); + } +} + +void ref_correlate_q7( + q7_t * pSrcA, + uint32_t srcALen, + q7_t * pSrcB, + uint32_t srcBLen, + q7_t * pDst) +{ + q7_t *pIn1 = pSrcA; /* inputA pointer */ + q7_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ + q31_t sum; /* Accumulator */ + uint32_t i = 0U, j; /* loop counters */ + uint32_t inv = 0U; /* Reverse order flag */ + uint32_t tot = 0U; /* Length */ + + /* Calculate the length of the remaining sequence */ + tot = ((srcALen + srcBLen) - 2U); + + if (srcALen > srcBLen) + { + /* Calculating the number of zeros to be padded to the output */ + j = srcALen - srcBLen; + + /* Initialise the pointer after zero padding */ + pDst += j; + } + + else if (srcALen < srcBLen) + { + /* Initialization to inputB pointer */ + pIn1 = pSrcB; + + /* Initialization to the end of inputA pointer */ + pIn2 = pSrcA + (srcALen - 1U); + + /* Initialisation of the pointer after zero padding */ + pDst = pDst + tot; + + /* Swapping the lengths */ + j = srcALen; + srcALen = srcBLen; + srcBLen = j; + + /* Setting the reverse flag */ + inv = 1; + + } + + /* Loop to calculate convolution for output length number of times */ + for (i = 0U; i <= tot; i++) + { + /* Initialize sum with zero to carry on MAC operations */ + sum = 0; + + /* Loop to perform MAC operations according to convolution equation */ + for (j = 0U; j <= i; j++) + { + /* Check the array limitations */ + if ((((i - j) < srcBLen) && (j < srcALen))) + { + /* z[i] += x[i-j] * y[j] */ + sum += ((q15_t) pIn1[j] * pIn2[-((int32_t) i - j)]); + } + } + /* Store the output in the destination buffer */ + if (inv == 1) + *pDst-- = (q7_t) __SSAT((sum >> 7U), 8U); + else + *pDst++ = (q7_t) __SSAT((sum >> 7U), 8U); + } +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c new file mode 100644 index 0000000..3e72b87 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir.c @@ -0,0 +1,325 @@ +#include "ref.h" + +void ref_fir_f32( + const arm_fir_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + float32_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;ipState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + q63_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;i> 31); + + /* Advance state pointer by 1 for the next sample */ + pState++; + + blockSize--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the starting of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + q31_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;i> 32); + } + + /* The result is store in the destination buffer. */ + *pDst++ = (q31_t)(acc << 1); + + /* Advance state pointer by 1 for the next sample */ + pState++; + + blockSize--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the starting of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + q63_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;i> 15); + + /* Advance state pointer by 1 for the next sample */ + pState++; + + blockSize--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the starting of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + q31_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;i> 15); + + /* Advance state pointer by 1 for the next sample */ + pState++; + + blockSize--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the starting of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q7_t *pStateCurnt; /* Points to the current sample of the state */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i; /* Loop counters */ + q31_t acc; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + while (blockSize > 0U) + { + /* Copy one sample at a time into state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + acc = 0.0f; + + for(i=0;i> 7); + + /* Advance state pointer by 1 for the next sample */ + pState++; + + blockSize--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the starting of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy data */ + for(i=0;ipState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + float32_t sum0; /* Accumulator */ + float32_t x0, c0; /* Temporary variables to hold state and coefficient values */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i, blkCnt; /* Loop counters */ + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + numTaps - 1U; + + /* Total number of output samples to be computed */ + blkCnt = blockSize / S->M; + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCurnt++ = *pSrc++; + } while (--i); + + /* Set accumulator to zero */ + sum0 = 0.0f; + + for(i=0;iM; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = sum0; + + /* Decrement the loop counter */ + blkCnt--; + } + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + /* Copy numTaps number of values */ + i = numTaps - 1U; + + /* copy data */ + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } +} + +void ref_fir_decimate_q31( + const arm_fir_decimate_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ + q63_t sum0; /* Accumulator */ + uint32_t numTaps = S->numTaps; /* Number of taps */ + uint32_t i, blkCnt; /* Loop counters */ + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + numTaps - 1U; + + /* Total number of output samples to be computed */ + blkCnt = blockSize / S->M; + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCurnt++ = *pSrc++; + + } while (--i); + + /* Set accumulator to zero */ + sum0 = 0; + + for(i=0;iM; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = (q31_t) (sum0 >> 31); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + i = numTaps - 1U; + + /* copy data */ + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } +} + +void ref_fir_decimate_fast_q31( + const arm_fir_decimate_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ + q31_t sum0; /* Accumulator */ + uint32_t numTaps = S->numTaps; /* Number of taps */ + uint32_t i, blkCnt; /* Loop counters */ + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + numTaps - 1U; + + /* Total number of output samples to be computed */ + blkCnt = blockSize / S->M; + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCurnt++ = *pSrc++; + + } while (--i); + + /* Set accumulator to zero */ + sum0 = 0; + + for(i=0;i> 32); + } + + /* Advance the state pointer by the decimation factor + * to process the next group of decimation factor number samples */ + pState = pState + S->M; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = (q31_t) (sum0 << 1); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + i = numTaps - 1U; + + /* copy data */ + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } +} + +void ref_fir_decimate_q15( + const arm_fir_decimate_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ + q63_t sum0; /* Accumulator */ + uint32_t numTaps = S->numTaps; /* Number of taps */ + uint32_t i, blkCnt; /* Loop counters */ + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + numTaps - 1U; + + /* Total number of output samples to be computed */ + blkCnt = blockSize / S->M; + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCurnt++ = *pSrc++; + + } while (--i); + + /* Set accumulator to zero */ + sum0 = 0; + + for(i=0;iM; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = ref_sat_q15(sum0 >> 15); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + i = numTaps - 1U; + + /* copy data */ + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } +} + +void ref_fir_decimate_fast_q15( + const arm_fir_decimate_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ + q31_t sum0; /* Accumulator */ + uint32_t numTaps = S->numTaps; /* Number of taps */ + uint32_t i, blkCnt; /* Loop counters */ + + /* S->pState buffer contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + numTaps - 1U; + + /* Total number of output samples to be computed */ + blkCnt = blockSize / S->M; + + while (blkCnt > 0U) + { + /* Copy decimation factor number of new input samples into the state buffer */ + i = S->M; + + do + { + *pStateCurnt++ = *pSrc++; + + } while (--i); + + /* Set accumulator to zero */ + sum0 = 0; + + for(i=0;iM; + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = ref_sat_q15(sum0 >> 15); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last numTaps - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + i = numTaps - 1U; + + /* copy data */ + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } +} + diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c new file mode 100644 index 0000000..8abb089 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_interpolate.c @@ -0,0 +1,291 @@ +#include "ref.h" + +void ref_fir_interpolate_f32( + const arm_fir_interpolate_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + float32_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ + float32_t sum; /* Accumulator */ + uint32_t i, blkCnt; /* Loop counters */ + uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */ + + + /* S->pState buffer contains previous frame (phaseLen - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + phaseLen - 1; + + /* Total number of intput samples */ + blkCnt = blockSize; + + /* Loop over the blockSize. */ + while (blkCnt > 0U) + { + /* Copy new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Loop over the Interpolation factor. */ + i = S->L; + + while (i > 0U) + { + /* Set accumulator to zero */ + sum = 0.0f; + + /* Initialize state pointer */ + ptr1 = pState; + + /* Initialize coefficient pointer */ + ptr2 = pCoeffs + i - 1; + + /* Loop over the polyPhase length */ + tapCnt = phaseLen; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + sum += *ptr1++ * *ptr2; + + /* Increment the coefficient pointer by interpolation factor times. */ + ptr2 += S->L; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = sum; + + /* Decrement the loop counter */ + i--; + } + + /* Advance the state pointer by 1 + * to process the next group of interpolation factor number samples */ + pState = pState + 1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last phaseLen - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + tapCnt = phaseLen - 1U; + + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } + +} + +void ref_fir_interpolate_q31( + const arm_fir_interpolate_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ + + /* Run the below code for Cortex-M0 */ + + q63_t sum; /* Accumulator */ + q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ + uint32_t i, blkCnt; /* Loop counters */ + uint16_t phaseLen = S->phaseLength, tapCnt; /* Length of each polyphase filter component */ + + + /* S->pState buffer contains previous frame (phaseLen - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + (q31_t)phaseLen - 1; + + /* Total number of intput samples */ + blkCnt = blockSize; + + /* Loop over the blockSize. */ + while (blkCnt > 0U) + { + /* Copy new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Loop over the Interpolation factor. */ + i = S->L; + + while (i > 0U) + { + /* Set accumulator to zero */ + sum = 0; + + /* Initialize state pointer */ + ptr1 = pState; + + /* Initialize coefficient pointer */ + ptr2 = pCoeffs + i - 1; + + tapCnt = phaseLen; + + while (tapCnt > 0U) + { + /* Read the coefficient */ + c0 = *(ptr2); + + /* Increment the coefficient pointer by interpolation factor times. */ + ptr2 += S->L; + + /* Read the input sample */ + x0 = *ptr1++; + + /* Perform the multiply-accumulate */ + sum += (q63_t) x0 *c0; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* The result is in the accumulator, store in the destination buffer. */ + *pDst++ = (q31_t)(sum >> 31); + + /* Decrement the loop counter */ + i--; + } + + /* Advance the state pointer by 1 + * to process the next group of interpolation factor number samples */ + pState = pState + 1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + tapCnt = phaseLen - 1U; + + /* copy data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } + +} + +void ref_fir_interpolate_q15( + const arm_fir_interpolate_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ + q63_t sum; /* Accumulator */ + q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ + uint32_t i, blkCnt, tapCnt; /* Loop counters */ + uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ + + + /* S->pState buffer contains previous frame (phaseLen - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = S->pState + phaseLen - 1; + + /* Total number of intput samples */ + blkCnt = blockSize; + + /* Loop over the blockSize. */ + while (blkCnt > 0U) + { + /* Copy new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Loop over the Interpolation factor. */ + i = S->L; + + while (i > 0U) + { + /* Set accumulator to zero */ + sum = 0; + + /* Initialize state pointer */ + ptr1 = pState; + + /* Initialize coefficient pointer */ + ptr2 = pCoeffs + i - 1; + + /* Loop over the polyPhase length */ + tapCnt = (uint32_t)phaseLen; + + while (tapCnt > 0U) + { + /* Read the coefficient */ + c0 = *ptr2; + + /* Increment the coefficient pointer by interpolation factor times. */ + ptr2 += S->L; + + /* Read the input sample */ + x0 = *ptr1++; + + /* Perform the multiply-accumulate */ + sum += (q31_t) x0 * c0; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Store the result after converting to 1.15 format in the destination buffer */ + *pDst++ = ref_sat_q15(sum >> 15); + + /* Decrement the loop counter */ + i--; + } + + /* Advance the state pointer by 1 + * to process the next group of interpolation factor number samples */ + pState = pState + 1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Processing is complete. + ** Now copy the last phaseLen - 1 samples to the start of the state buffer. + ** This prepares the state buffer for the next function call. */ + + /* Points to the start of the state buffer */ + pStateCurnt = S->pState; + + i = (uint32_t) phaseLen - 1U; + + while (i > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + i--; + } + +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c new file mode 100644 index 0000000..6466106 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_lattice.c @@ -0,0 +1,241 @@ +#include "ref.h" + +void ref_fir_lattice_f32( + const arm_fir_lattice_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t *pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *px; /* temporary state pointer */ + float32_t *pk; /* temporary coefficient pointer */ + float32_t fcurr, fnext, gcurr, gnext; /* temporary variables */ + uint32_t numStages = S->numStages; /* Length of the filter */ + uint32_t blkCnt, stageCnt; /* temporary variables for counts */ + + pState = &S->pState[0]; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* f0(n) = x(n) */ + fcurr = *pSrc++; + + /* Initialize coeff pointer */ + pk = pCoeffs; + + /* Initialize state pointer */ + px = pState; + + /* read g0(n-1) from state buffer */ + gcurr = *px; + + /* for sample 1 processing */ + /* f1(n) = f0(n) + K1 * g0(n-1) */ + fnext = fcurr + ((*pk) * gcurr); + /* g1(n) = f0(n) * K1 + g0(n-1) */ + gnext = (fcurr * (*pk++)) + gcurr; + + /* save f0(n) in state buffer */ + *px++ = fcurr; + + /* f1(n) is saved in fcurr + for next stage processing */ + fcurr = fnext; + + stageCnt = (numStages - 1U); + + /* stage loop */ + while (stageCnt > 0U) + { + /* read g2(n) from state buffer */ + gcurr = *px; + + /* save g1(n) in state buffer */ + *px++ = gnext; + + /* Sample processing for K2, K3.... */ + /* f2(n) = f1(n) + K2 * g1(n-1) */ + fnext = fcurr + ((*pk) * gcurr); + /* g2(n) = f1(n) * K2 + g1(n-1) */ + gnext = (fcurr * (*pk++)) + gcurr; + + /* f1(n) is saved in fcurr1 + for next stage processing */ + fcurr = fnext; + + stageCnt--; + } + + /* y(n) = fN(n) */ + *pDst++ = fcurr; + + blkCnt--; + } +} + +void ref_fir_lattice_q31( + const arm_fir_lattice_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t *pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *px; /* temporary state pointer */ + q31_t *pk; /* temporary coefficient pointer */ + q31_t fcurr, fnext, gcurr, gnext; /* temporary variables */ + uint32_t numStages = S->numStages; /* Length of the filter */ + uint32_t blkCnt, stageCnt; /* temporary variables for counts */ + + pState = &S->pState[0]; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* f0(n) = x(n) */ + fcurr = *pSrc++; + + /* Initialize coeff pointer */ + pk = pCoeffs; + + /* Initialize state pointer */ + px = pState; + + /* read g0(n-1) from state buffer */ + gcurr = *px; + + /* for sample 1 processing */ + /* f1(n) = f0(n) + K1 * g0(n-1) */ + fnext = (q31_t) (((q63_t) gcurr * (*pk)) >> 31) + fcurr; + /* g1(n) = f0(n) * K1 + g0(n-1) */ + gnext = (q31_t) (((q63_t) fcurr * (*pk++)) >> 31) + gcurr; + /* save g1(n) in state buffer */ + *px++ = fcurr; + + /* f1(n) is saved in fcurr1 + for next stage processing */ + fcurr = fnext; + + stageCnt = (numStages - 1U); + + /* stage loop */ + while (stageCnt > 0U) + { + /* read g2(n) from state buffer */ + gcurr = *px; + + /* save g1(n) in state buffer */ + *px++ = gnext; + + /* Sample processing for K2, K3.... */ + /* f2(n) = f1(n) + K2 * g1(n-1) */ + fnext = (q31_t) (((q63_t) gcurr * (*pk)) >> 31) + fcurr; + /* g2(n) = f1(n) * K2 + g1(n-1) */ + gnext = (q31_t) (((q63_t) fcurr * (*pk++)) >> 31) + gcurr; + + /* f1(n) is saved in fcurr1 + for next stage processing */ + fcurr = fnext; + + stageCnt--; + + } + + /* y(n) = fN(n) */ + *pDst++ = fcurr; + + blkCnt--; + + } +} + +void ref_fir_lattice_q15( + const arm_fir_lattice_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q15_t *pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *px; /* temporary state pointer */ + q15_t *pk; /* temporary coefficient pointer */ + q31_t fcurnt, fnext, gcurnt, gnext; /* temporary variables */ + uint32_t numStages = S->numStages; /* Length of the filter */ + uint32_t blkCnt, stageCnt; /* temporary variables for counts */ + + pState = &S->pState[0]; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* f0(n) = x(n) */ + fcurnt = *pSrc++; + + /* Initialize coeff pointer */ + pk = (pCoeffs); + + /* Initialize state pointer */ + px = pState; + + /* read g0(n-1) from state buffer */ + gcurnt = *px; + + /* for sample 1 processing */ + /* f1(n) = f0(n) + K1 * g0(n-1) */ + fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt; + fnext = ref_sat_q15(fnext); + + + /* g1(n) = f0(n) * K1 + g0(n-1) */ + gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt; + gnext = ref_sat_q15(gnext); + + /* save f0(n) in state buffer */ + *px++ = (q15_t) fcurnt; + + /* f1(n) is saved in fcurnt + for next stage processing */ + fcurnt = fnext; + + stageCnt = (numStages - 1U); + + /* stage loop */ + while (stageCnt > 0U) + { + /* read g1(n-1) from state buffer */ + gcurnt = *px; + + /* save g0(n-1) in state buffer */ + *px++ = (q15_t) gnext; + + /* Sample processing for K2, K3.... */ + /* f2(n) = f1(n) + K2 * g1(n-1) */ + fnext = ((gcurnt * (*pk)) >> 15U) + fcurnt; + fnext = ref_sat_q15(fnext); + + /* g2(n) = f1(n) * K2 + g1(n-1) */ + gnext = ((fcurnt * (*pk++)) >> 15U) + gcurnt; + gnext = ref_sat_q15(gnext); + + + /* f1(n) is saved in fcurnt + for next stage processing */ + fcurnt = fnext; + + stageCnt--; + + } + + /* y(n) = fN(n) */ + *pDst++ = ref_sat_q15(fcurnt); + + + blkCnt--; + + } +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c new file mode 100644 index 0000000..0638313 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/fir_sparse.c @@ -0,0 +1,485 @@ +#include "ref.h" + +void ref_fir_sparse_f32( + arm_fir_sparse_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + float32_t * pScratchIn, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *px; /* Scratch buffer pointer */ + float32_t *py = pState; /* Temporary pointers for state buffer */ + float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ + float32_t *pOut; /* Destination pointer */ + int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ + uint32_t delaySize = S->maxDelay + blockSize; /* state length */ + uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + int32_t readIndex; /* Read index of the state buffer */ + uint32_t tapCnt, blkCnt; /* loop counters */ + float32_t coeff = *pCoeffs++; /* Read the first coefficient value */ + + + /* BlockSize of Input samples are copied into the state buffer */ + /* StateIndex points to the starting position to write in the state buffer */ + arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, + (int32_t *) pSrc, 1, blockSize); + + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, + (int32_t *) pb, (int32_t *) pb, blockSize, 1, + blockSize); + + /* Working pointer for the scratch buffer */ + px = pb; + + /* Working pointer for destination buffer */ + pOut = pDst; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiplications and store in destination buffer */ + *pOut++ = *px++ * coeff; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop over the number of taps. */ + tapCnt = (uint32_t) numTaps - 1U; + + while (tapCnt > 0U) + { + /* Load the coefficient value and + * increment the coefficient buffer for the next set of state values */ + coeff = *pCoeffs++; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, + (int32_t *) pb, (int32_t *) pb, blockSize, 1, + blockSize); + + /* Working pointer for the scratch buffer */ + px = pb; + + /* Working pointer for destination buffer */ + pOut = pDst; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiply-Accumulate */ + *pOut++ += *px++ * coeff; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Decrement the tap loop counter */ + tapCnt--; + } +} + +void ref_fir_sparse_q31( + arm_fir_sparse_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + q31_t * pScratchIn, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *px; /* Scratch buffer pointer */ + q31_t *py = pState; /* Temporary pointers for state buffer */ + q31_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ + q31_t *pOut; /* Destination pointer */ + q63_t out; /* Temporary output variable */ + int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ + uint32_t delaySize = S->maxDelay + blockSize; /* state length */ + uint16_t numTaps = S->numTaps; /* Filter order */ + int32_t readIndex; /* Read index of the state buffer */ + uint32_t tapCnt, blkCnt; /* loop counters */ + q31_t coeff = *pCoeffs++; /* Read the first coefficient value */ + q31_t in; + + + /* BlockSize of Input samples are copied into the state buffer */ + /* StateIndex points to the starting position to write in the state buffer */ + arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, + (int32_t *) pSrc, 1, blockSize); + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, + (int32_t *) pb, (int32_t *) pb, blockSize, 1, + blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pOut = pDst; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiplications and store in the destination buffer */ + *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop over the number of taps. */ + tapCnt = (uint32_t) numTaps - 1U; + + while (tapCnt > 0U) + { + /* Load the coefficient value and + * increment the coefficient buffer for the next set of state values */ + coeff = *pCoeffs++; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, + (int32_t *) pb, (int32_t *) pb, blockSize, 1, + blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pOut = pDst; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiply-Accumulate */ + out = *pOut; + out += ((q63_t) * px++ * coeff) >> 32; + *pOut++ = (q31_t) (out); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Decrement the tap loop counter */ + tapCnt--; + } + + /* Working output pointer is updated */ + pOut = pDst; + + /* Output is converted into 1.31 format. */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + in = *pOut << 1; + *pOut++ = in; + + /* Decrement the loop counter */ + blkCnt--; + } +} + +void ref_fir_sparse_q15( + arm_fir_sparse_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + q15_t * pScratchIn, + q31_t * pScratchOut, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pIn = pSrc; /* Working pointer for input */ + q15_t *pOut = pDst; /* Working pointer for output */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *px; /* Temporary pointers for scratch buffer */ + q15_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ + q15_t *py = pState; /* Temporary pointers for state buffer */ + int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ + uint32_t delaySize = S->maxDelay + blockSize; /* state length */ + uint16_t numTaps = S->numTaps; /* Filter order */ + int32_t readIndex; /* Read index of the state buffer */ + uint32_t tapCnt, blkCnt; /* loop counters */ + q15_t coeff = *pCoeffs++; /* Read the first coefficient value */ + q31_t *pScr2 = pScratchOut; /* Working pointer for pScratchOut */ + + /* BlockSize of Input samples are copied into the state buffer */ + /* StateIndex points to the starting position to write in the state buffer */ + arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize); + + /* Loop over the number of taps. */ + tapCnt = numTaps; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = (S->stateIndex - blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_q15(py, delaySize, &readIndex, 1, + pb, pb, blockSize, 1, blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pScratchOut = pScr2; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform multiplication and store in the scratch buffer */ + *pScratchOut++ = ((q31_t) * px++ * coeff); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop over the number of taps. */ + tapCnt = (uint32_t) numTaps - 1U; + + while (tapCnt > 0U) + { + /* Load the coefficient value and + * increment the coefficient buffer for the next set of state values */ + coeff = *pCoeffs++; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = (S->stateIndex - blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_q15(py, delaySize, &readIndex, 1, + pb, pb, blockSize, 1, blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pScratchOut = pScr2; + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiply-Accumulate */ + *pScratchOut++ += (q31_t) * px++ * coeff; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Decrement the tap loop counter */ + tapCnt--; + } + + /* All the output values are in pScratchOut buffer. + Convert them into 1.15 format, saturate and store in the destination buffer. */ + /* Loop over the blockSize. */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16); + blkCnt--; + } +} + +void ref_fir_sparse_q7( + arm_fir_sparse_instance_q7 * S, + q7_t *pSrc, + q7_t *pDst, + q7_t *pScratchIn, + q31_t * pScratchOut, + uint32_t blockSize) +{ + q7_t *pState = S->pState; /* State pointer */ + q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q7_t *px; /* Scratch buffer pointer */ + q7_t *py = pState; /* Temporary pointers for state buffer */ + q7_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */ + q7_t *pOut = pDst; /* Destination pointer */ + int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */ + uint32_t delaySize = S->maxDelay + blockSize; /* state length */ + uint16_t numTaps = S->numTaps; /* Filter order */ + int32_t readIndex; /* Read index of the state buffer */ + uint32_t tapCnt, blkCnt; /* loop counters */ + q7_t coeff = *pCoeffs++; /* Read the coefficient value */ + q31_t *pScr2 = pScratchOut; /* Working pointer for scratch buffer of output values */ + q31_t in; + + /* BlockSize of Input samples are copied into the state buffer */ + /* StateIndex points to the starting position to write in the state buffer */ + arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1, + blockSize); + + /* Loop over the number of taps. */ + tapCnt = numTaps; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, + (int32_t) blockSize, 1, blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pScratchOut = pScr2; + + /* Loop over the blockSize */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform multiplication and store in the scratch buffer */ + *pScratchOut++ = ((q31_t) * px++ * coeff); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Loop over the number of taps. */ + tapCnt = (uint32_t) numTaps - 1U; + + while (tapCnt > 0U) + { + /* Load the coefficient value and + * increment the coefficient buffer for the next set of state values */ + coeff = *pCoeffs++; + + /* Read Index, from where the state buffer should be read, is calculated. */ + readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; + + /* Wraparound of readIndex */ + if (readIndex < 0) + { + readIndex += (int32_t) delaySize; + } + + /* Working pointer for state buffer is updated */ + py = pState; + + /* blockSize samples are read from the state buffer */ + arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, + (int32_t) blockSize, 1, blockSize); + + /* Working pointer for the scratch buffer of state values */ + px = pb; + + /* Working pointer for scratch buffer of output values */ + pScratchOut = pScr2; + + /* Loop over the blockSize */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Perform Multiply-Accumulate */ + in = *pScratchOut + ((q31_t) * px++ * coeff); + *pScratchOut++ = in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Decrement the tap loop counter */ + tapCnt--; + } + + /* All the output values are in pScratchOut buffer. + Convert them into 1.15 format, saturate and store in the destination buffer. */ + /* Loop over the blockSize. */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8); + + /* Decrement the blockSize loop counter */ + blkCnt--; + } +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c new file mode 100644 index 0000000..ab37d5f --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/iir_lattice.c @@ -0,0 +1,271 @@ +#include "ref.h" + +void ref_iir_lattice_f32( + const arm_iir_lattice_instance_f32 * S, + float32_t * pSrc, + float32_t * pDst, + uint32_t blockSize) +{ + float32_t fcurr, fnext = 0, gcurr, gnext; /* Temporary variables for lattice stages */ + float32_t acc; /* Accumlator */ + uint32_t blkCnt, tapCnt; /* temporary variables for counts */ + float32_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ + uint32_t numStages = S->numStages; /* number of stages */ + float32_t *pState; /* State pointer */ + float32_t *pStateCurnt; /* State current pointer */ + + blkCnt = blockSize; + pState = &S->pState[0]; + + /* Sample processing */ + while (blkCnt > 0U) + { + /* Read Sample from input buffer */ + /* fN(n) = x(n) */ + fcurr = *pSrc++; + + /* Initialize state read pointer */ + px1 = pState; + /* Initialize state write pointer */ + px2 = pState; + /* Set accumulator to zero */ + acc = 0.0f; + /* Initialize Ladder coeff pointer */ + pv = &S->pvCoeffs[0]; + /* Initialize Reflection coeff pointer */ + pk = &S->pkCoeffs[0]; + + /* Process sample for numStages */ + tapCnt = numStages; + + while (tapCnt > 0U) + { + gcurr = *px1++; + /* Process sample for last taps */ + fnext = fcurr - (*pk) * gcurr; + gnext = fnext * (*pk++) + gcurr; + + /* Output samples for last taps */ + acc += gnext * (*pv++); + *px2++ = gnext; + fcurr = fnext; + + /* Decrementing loop counter */ + tapCnt--; + } + + /* y(n) += g0(n) * v0 */ + acc += fnext * (*pv); + + *px2++ = fnext; + + /* write out into pDst */ + *pDst++ = acc; + + /* Advance the state pointer by 1 to process the next group of samples */ + pState = pState + 1U; + blkCnt--; + } + + /* Processing is complete. Now copy last S->numStages samples to start of the buffer + for the preperation of next frame process */ + + /* Points to the start of the state buffer */ + pStateCurnt = &S->pState[0]; + pState = &S->pState[blockSize]; + + tapCnt = numStages; + + /* Copy the data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} + +void ref_iir_lattice_q31( + const arm_iir_lattice_instance_q31 * S, + q31_t * pSrc, + q31_t * pDst, + uint32_t blockSize) +{ + q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */ + q63_t acc; /* Accumlator */ + uint32_t blkCnt, tapCnt; /* Temporary variables for counts */ + q31_t *px1, *px2, *pk, *pv; /* Temporary pointers for state and coef */ + uint32_t numStages = S->numStages; /* number of stages */ + q31_t *pState; /* State pointer */ + q31_t *pStateCurnt; /* State current pointer */ + + blkCnt = blockSize; + pState = &S->pState[0]; + + /* Sample processing */ + while (blkCnt > 0U) + { + /* Read Sample from input buffer */ + /* fN(n) = x(n) */ + fcurr = *pSrc++; + + /* Initialize state read pointer */ + px1 = pState; + /* Initialize state write pointer */ + px2 = pState; + /* Set accumulator to zero */ + acc = 0; + /* Initialize Ladder coeff pointer */ + pv = &S->pvCoeffs[0]; + /* Initialize Reflection coeff pointer */ + pk = &S->pkCoeffs[0]; + + tapCnt = numStages; + + while (tapCnt > 0U) + { + gcurr = *px1++; + /* Process sample */ + /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ + fnext = + ref_sat_q31(((q63_t) fcurr - + ((q31_t) (((q63_t) gcurr * (*pk)) >> 31)))); + /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ + gnext = + ref_sat_q31(((q63_t) gcurr + + ((q31_t) (((q63_t) fnext * (*pk++)) >> 31)))); + /* Output samples */ + /* y(n) += gN(n) * vN */ + acc += ((q63_t) gnext * *pv++); + /* write gN-1(n-1) into state for next sample processing */ + *px2++ = gnext; + /* Update f values for next coefficient processing */ + fcurr = fnext; + + tapCnt--; + } + + /* y(n) += g0(n) * v0 */ + acc += (q63_t) fnext *(*pv++); + + *px2++ = fnext; + + /* write out into pDst */ + *pDst++ = (q31_t) (acc >> 31U); + + /* Advance the state pointer by 1 to process the next group of samples */ + pState = pState + 1U; + blkCnt--; + } + + /* Processing is complete. Now copy last S->numStages samples to start of the buffer + for the preperation of next frame process */ + + /* Points to the start of the state buffer */ + pStateCurnt = &S->pState[0]; + pState = &S->pState[blockSize]; + + tapCnt = numStages; + + /* Copy the remaining q31_t data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} + +void ref_iir_lattice_q15( + const arm_iir_lattice_instance_q15 * S, + q15_t * pSrc, + q15_t * pDst, + uint32_t blockSize) +{ + q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */ + uint32_t stgCnt; /* Temporary variables for counts */ + q63_t acc; /* Accumlator */ + uint32_t blkCnt, tapCnt; /* Temporary variables for counts */ + q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */ + uint32_t numStages = S->numStages; /* number of stages */ + q15_t *pState; /* State pointer */ + q15_t *pStateCurnt; /* State current pointer */ + q15_t out; /* Temporary variable for output */ + + blkCnt = blockSize; + pState = &S->pState[0]; + + /* Sample processing */ + while (blkCnt > 0U) + { + /* Read Sample from input buffer */ + /* fN(n) = x(n) */ + fcurr = *pSrc++; + + /* Initialize state read pointer */ + px1 = pState; + /* Initialize state write pointer */ + px2 = pState; + /* Set accumulator to zero */ + acc = 0; + /* Initialize Ladder coeff pointer */ + pv = &S->pvCoeffs[0]; + /* Initialize Reflection coeff pointer */ + pk = &S->pkCoeffs[0]; + + tapCnt = numStages; + + while (tapCnt > 0U) + { + gcurr = *px1++; + /* Process sample */ + /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ + fnext = fcurr - ((gcurr * (*pk)) >> 15); + fnext = ref_sat_q15(fnext); + /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ + gnext = ((fnext * (*pk++)) >> 15) + gcurr; + gnext = ref_sat_q15(gnext); + /* Output samples */ + /* y(n) += gN(n) * vN */ + acc += (q31_t) ((gnext * (*pv++))); + /* write gN(n) into state for next sample processing */ + *px2++ = (q15_t) gnext; + /* Update f values for next coefficient processing */ + fcurr = fnext; + + tapCnt--; + } + + /* y(n) += g0(n) * v0 */ + acc += (q31_t) ((fnext * (*pv++))); + + out = ref_sat_q15(acc >> 15); + *px2++ = (q15_t) fnext; + + /* write out into pDst */ + *pDst++ = out; + + /* Advance the state pointer by 1 to process the next group of samples */ + pState = pState + 1U; + blkCnt--; + } + + /* Processing is complete. Now copy last S->numStages samples to start of the buffer + for the preperation of next frame process */ + /* Points to the start of the state buffer */ + pStateCurnt = &S->pState[0]; + pState = &S->pState[blockSize]; + + stgCnt = numStages; + + /* copy data */ + while (stgCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + stgCnt--; + } +} diff --git a/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c new file mode 100644 index 0000000..fee99f9 --- /dev/null +++ b/fw/hid-dials/Drivers/CMSIS/DSP/DSP_Lib_TestSuite/RefLibs/src/FilteringFunctions/lms.c @@ -0,0 +1,695 @@ +#include "ref.h" + +void ref_lms_f32( + const arm_lms_instance_f32 * S, + float32_t * pSrc, + float32_t * pRef, + float32_t * pOut, + float32_t * pErr, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + float32_t mu = S->mu; /* Adaptive factor */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i, blkCnt; /* Loop counters */ + float32_t sum, e, d; /* accumulator, error, reference data sample */ + float32_t w = 0.0f; /* weight factor */ + + e = 0.0f; + d = 0.0f; + + /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[numTaps - 1U]); + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Set the accumulator to zero */ + sum = 0.0f; + + for(i=0;ipState[i] = pState[i]; + } +} + +void ref_lms_norm_f32( + arm_lms_norm_instance_f32 * S, + float32_t * pSrc, + float32_t * pRef, + float32_t * pOut, + float32_t * pErr, + uint32_t blockSize) +{ + float32_t *pState = S->pState; /* State pointer */ + float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + float32_t *pStateCurnt; /* Points to the current sample of the state */ + float32_t mu = S->mu; /* Adaptive factor */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t i, blkCnt; /* Loop counters */ + float32_t energy; /* Energy of the input */ + float32_t sum, e, d; /* accumulator, error, reference data sample */ + float32_t w, x0, in; /* weight factor, temporary variable to hold input sample and state */ + + /* Initializations of error, difference, Coefficient update */ + e = 0.0f; + d = 0.0f; + w = 0.0f; + + energy = S->energy; + x0 = S->x0; + + /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[numTaps - 1U]); + + for(blkCnt = blockSize; blkCnt > 0U; blkCnt--) + { + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc; + + /* Read the sample from input buffer */ + in = *pSrc++; + + /* Update the energy calculation */ + energy -= x0 * x0; + energy += in * in; + + /* Set the accumulator to zero */ + sum = 0.0f; + + for(i=0;ienergy = energy; + S->x0 = x0; + + /* Processing is complete. Now copy the last numTaps - 1 samples to the + * start of the state buffer. This prepares the state buffer for the + * next function call. */ + for(i=0;ipState[i] = pState[i]; + } +} + +void ref_lms_q31( + const arm_lms_instance_q31 * S, + q31_t * pSrc, + q31_t * pRef, + q31_t * pOut, + q31_t * pErr, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t mu = S->mu; /* Adaptive factor */ + q31_t *px; /* Temporary pointer for state */ + q31_t *pb; /* Temporary pointer for coefficient buffer */ + uint32_t tapCnt, blkCnt; /* Loop counters */ + q63_t acc; /* Accumulator */ + q31_t e = 0; /* error of data sample */ + q31_t alpha; /* Intermediate constant for taps update */ + q31_t coef; /* Temporary variable for coef */ + q31_t acc_l, acc_h; /* temporary input */ + uint32_t uShift = (uint32_t)S->postShift + 1; + uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */ + + /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + for(blkCnt = blockSize; blkCnt > 0U; blkCnt--) + { + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Initialize pState pointer */ + px = pState; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Set the accumulator to zero */ + acc = 0; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + acc += (q63_t)(*px++) * (*pb++); + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Converting the result to 1.31 format */ + /* Store the result from accumulator into the destination buffer. */ + /* Calc lower part of acc */ + acc_l = acc & 0xffffffff; + + /* Calc upper part of acc */ + acc_h = (acc >> 32) & 0xffffffff; + + acc = (uint32_t)acc_l >> lShift | acc_h << uShift; + + *pOut++ = (q31_t)acc; + + /* Compute and store error */ + e = *pRef++ - (q31_t)acc; + + *pErr++ = (q31_t)e; + + /* Weighting factor for the LMS version */ + alpha = (q31_t)(((q63_t)e * mu) >> 31); + + /* Initialize pState pointer */ + /* Advance state pointer by 1 for the next sample */ + px = pState++; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + coef = (q31_t)(((q63_t) alpha * (*px++)) >> 32); + *pb = ref_sat_q31((q63_t)*pb + (coef << 1)); + pb++; + + /* Decrement the loop counter */ + tapCnt--; + } + } + + /* Processing is complete. Now copy the last numTaps - 1 samples to the + start of the state buffer. This prepares the state buffer for the + next function call. */ + + /* Points to the start of the pState buffer */ + pStateCurnt = S->pState; + + /* Copy (numTaps - 1U) samples */ + tapCnt = numTaps - 1; + + /* Copy the data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} + +void ref_lms_norm_q31( + arm_lms_norm_instance_q31 * S, + q31_t * pSrc, + q31_t * pRef, + q31_t * pOut, + q31_t * pErr, + uint32_t blockSize) +{ + q31_t *pState = S->pState; /* State pointer */ + q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q31_t *pStateCurnt; /* Points to the current sample of the state */ + q31_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ + q31_t mu = S->mu; /* Adaptive factor */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t tapCnt, blkCnt; /* Loop counters */ + q63_t energy; /* Energy of the input */ + q63_t acc; /* Accumulator */ + q31_t e = 0, d = 0; /* error, reference data sample */ + q31_t w = 0, in; /* weight factor and state */ + q31_t x0; /* temporary variable to hold input sample */ + q63_t errorXmu; /* Temporary variables to store error and mu product and reciprocal of energy */ + q31_t coef; /* Temporary variable for coef */ + q31_t acc_l, acc_h; /* temporary input */ + uint32_t uShift = ((uint32_t) S->postShift + 1U); + uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */ + + energy = S->energy; + x0 = S->x0; + + /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + for(blkCnt = blockSize; blkCnt > 0U; blkCnt--) + { + + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc; + + /* Initialize pState pointer */ + px = pState; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Read the sample from input buffer */ + in = *pSrc++; + + /* Update the energy calculation */ + energy = (q31_t)((((q63_t)energy << 32) - (((q63_t)x0 * x0) << 1)) >> 32) & 0xffffffff; + energy = (q31_t)(((((q63_t)in * in) << 1) + ((q63_t)energy << 32)) >> 32) & 0xffffffff; + + /* Set the accumulator to zero */ + acc = 0; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + acc += ((q63_t) (*px++)) * (*pb++); + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Converting the result to 1.31 format */ + /* Calc lower part of acc */ + acc_l = acc & 0xffffffff; + + /* Calc upper part of acc */ + acc_h = (acc >> 32) & 0xffffffff; + + acc = (uint32_t)acc_l >> lShift | acc_h << uShift; + + /* Store the result from accumulator into the destination buffer. */ + *pOut++ = (q31_t)acc; + + /* Compute and store error */ + d = *pRef++; + e = d - (q31_t)acc; + *pErr++ = e; + + /* Calculation of product of (e * mu) */ + errorXmu = (q63_t)e * mu; + + /* Weighting factor for the normalized version */ + w = ref_sat_q31(errorXmu / (energy + DELTA_Q31)); + + /* Initialize pState pointer */ + px = pState; + + /* Initialize coeff pointer */ + pb = pCoeffs; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + /* coef is in 2.30 format */ + coef = (q31_t)(((q63_t)w * (*px++)) >> 32); + /* get coef in 1.31 format by left shifting */ + *pb = ref_sat_q31((q63_t)*pb + (coef << 1U)); + /* update coefficient buffer to next coefficient */ + pb++; + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Read the sample from state buffer */ + x0 = *pState; + + /* Advance state pointer by 1 for the next sample */ + pState++; + } + + /* Save energy and x0 values for the next frame */ + S->energy = (q31_t)energy; + S->x0 = x0; + + /* Processing is complete. Now copy the last numTaps - 1 samples to the + start of the state buffer. This prepares the state buffer for the + next function call. */ + + /* Points to the start of the pState buffer */ + pStateCurnt = S->pState; + + /* Loop for (numTaps - 1U) samples copy */ + tapCnt = numTaps - 1; + + /* Copy the remaining q31_t data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} + +void ref_lms_q15( + const arm_lms_instance_q15 * S, + q15_t * pSrc, + q15_t * pRef, + q15_t * pOut, + q15_t * pErr, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + q15_t mu = S->mu; /* Adaptive factor */ + q15_t *px; /* Temporary pointer for state */ + q15_t *pb; /* Temporary pointer for coefficient buffer */ + uint32_t tapCnt, blkCnt; /* Loop counters */ + q63_t acc; /* Accumulator */ + q15_t e = 0; /* error of data sample */ + q15_t alpha; /* Intermediate constant for taps update */ + q31_t coef; /* Teporary variable for coefficient */ + q31_t acc_l, acc_h; + int32_t lShift = 15 - (int32_t)S->postShift; /* Post shift */ + int32_t uShift = 32 - lShift; + + /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + for(blkCnt = blockSize; blkCnt > 0U; blkCnt--) + { + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc++; + + /* Initialize pState pointer */ + px = pState; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Set the accumulator to zero */ + acc = 0; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + acc += (q63_t)((q31_t)(*px++) * (*pb++)); + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Calc lower part of acc */ + acc_l = acc & 0xffffffff; + + /* Calc upper part of acc */ + acc_h = (acc >> 32) & 0xffffffff; + + /* Apply shift for lower part of acc and upper part of acc */ + acc = (uint32_t)acc_l >> lShift | acc_h << uShift; + + /* Converting the result to 1.15 format and saturate the output */ + acc = ref_sat_q15(acc); + + /* Store the result from accumulator into the destination buffer. */ + *pOut++ = (q15_t)acc; + + /* Compute and store error */ + e = *pRef++ - (q15_t)acc; + + *pErr++ = (q15_t)e; + + /* Compute alpha i.e. intermediate constant for taps update */ + alpha = (q15_t)(((q31_t)e * mu) >> 15); + + /* Initialize pState pointer */ + /* Advance state pointer by 1 for the next sample */ + px = pState++; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15); + *pb++ = (q15_t) ref_sat_q15(coef); + + /* Decrement the loop counter */ + tapCnt--; + } + } + + /* Processing is complete. Now copy the last numTaps - 1 samples to the + start of the state buffer. This prepares the state buffer for the + next function call. */ + + /* Points to the start of the pState buffer */ + pStateCurnt = S->pState; + + /* Copy (numTaps - 1U) samples */ + tapCnt = numTaps - 1; + + /* Copy the data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} + +void ref_lms_norm_q15( + arm_lms_norm_instance_q15 * S, + q15_t * pSrc, + q15_t * pRef, + q15_t * pOut, + q15_t * pErr, + uint32_t blockSize) +{ + q15_t *pState = S->pState; /* State pointer */ + q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ + q15_t *pStateCurnt; /* Points to the current sample of the state */ + q15_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ + q15_t mu = S->mu; /* Adaptive factor */ + uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ + uint32_t tapCnt, blkCnt; /* Loop counters */ + q31_t energy; /* Energy of the input */ + q63_t acc; /* Accumulator */ + q15_t e = 0, d = 0; /* error, reference data sample */ + q15_t w = 0, in; /* weight factor and state */ + q15_t x0; /* temporary variable to hold input sample */ + q15_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */ + //q31_t errorXmu; /* Temporary variables to store error and mu product and reciprocal of energy */ + q15_t postShift; /* Post shift to be applied to weight after reciprocal calculation */ + q31_t coef; /* Teporary variable for coefficient */ + q31_t acc_l, acc_h; + int32_t lShift = 15 - (int32_t)S->postShift; /* Post shift */ + int32_t uShift = 32 - lShift; + + energy = S->energy; + x0 = S->x0; + + /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ + /* pStateCurnt points to the location where the new input data should be written */ + pStateCurnt = &(S->pState[(numTaps - 1U)]); + + for(blkCnt = blockSize; blkCnt > 0U; blkCnt--) + { + /* Copy the new input sample into the state buffer */ + *pStateCurnt++ = *pSrc; + + /* Initialize pState pointer */ + px = pState; + + /* Initialize pCoeffs pointer */ + pb = pCoeffs; + + /* Read the sample from input buffer */ + in = *pSrc++; + + /* Update the energy calculation */ + energy -= (((q31_t)x0 * x0) >> 15) & 0xffff; + energy += (((q31_t)in * in) >> 15) & 0xffff; + + /* Set the accumulator to zero */ + acc = 0; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + acc += (q31_t)*px++ * (*pb++); + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Calc lower part of acc */ + acc_l = acc & 0xffffffff; + + /* Calc upper part of acc */ + acc_h = (acc >> 32) & 0xffffffff; + + /* Apply shift for lower part of acc and upper part of acc */ + acc = (uint32_t) acc_l >> lShift | acc_h << uShift; + + /* Converting the result to 1.15 format and saturate the output */ + acc = ref_sat_q15(acc); + + /* Store the result from accumulator into the destination buffer. */ + *pOut++ = (q15_t) acc; + + /* Compute and store error */ + d = *pRef++; + e = d - (q15_t) acc; + *pErr++ = e; + +#if 0 + /* Calculation of e * mu value */ + errorXmu = (q31_t) e * mu; + + /* Calculation of (e * mu) /energy value */ + acc = errorXmu / (energy + DELTA_Q15); +#endif + + /* Calculation of 1/energy */ + postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, + &oneByEnergy, S->recipTable); + + /* Calculation of e * mu value */ + errorXmu = (q15_t) (((q31_t) e * mu) >> 15); + + /* Calculation of (e * mu) * (1/energy) value */ + acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); + + /* Weighting factor for the normalized version */ + w = ref_sat_q15((q31_t)acc); + + /* Initialize pState pointer */ + px = pState; + + /* Initialize coeff pointer */ + pb = pCoeffs; + + /* Loop over numTaps number of values */ + tapCnt = numTaps; + + while (tapCnt > 0U) + { + /* Perform the multiply-accumulate */ + coef = *pb + (((q31_t)w * (*px++)) >> 15); + *pb++ = ref_sat_q15(coef); + + /* Decrement the loop counter */ + tapCnt--; + } + + /* Read the sample from state buffer */ + x0 = *pState; + + /* Advance state pointer by 1 for the next sample */ + pState = pState + 1U; + } + + /* Save energy and x0 values for the next frame */ + S->energy = (q15_t)energy; + S->x0 = x0; + + /* Processing is complete. Now copy the last numTaps - 1 samples to the + satrt of the state buffer. This prepares the state buffer for the + next function call. */ + + /* Points to the start of the pState buffer */ + pStateCurnt = S->pState; + + /* copy (numTaps - 1U) data */ + tapCnt = numTaps - 1; + + /* copy data */ + while (tapCnt > 0U) + { + *pStateCurnt++ = *pState++; + + /* Decrement the loop counter */ + tapCnt--; + } +} -- cgit