#include "ref.h" void ref_correlate_f32( float32_t * pSrcA, uint32_t srcALen, float32_t * pSrcB, uint32_t srcBLen, float32_t * pDst) { float32_t *pIn1 = pSrcA; /* inputA pointer */ float32_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ float32_t sum; /* Accumulator */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* The algorithm implementation is based on the lengths of the inputs. * srcB is always made to slide across srcA. * So srcBLen is always considered as shorter or equal to srcALen * But CORR(x, y) is reverse of CORR(y, x) * So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer * and a variable, inv is set to 1 * If lengths are not equal then zero pad has to be done to make the two * inputs of same length. But to improve the performance, we include zeroes * in the output instead of zero padding either of the the inputs * If srcALen > srcBLen, (srcALen - srcBLen) zeroes has to included in the * starting of the output buffer * If srcALen < srcBLen, (srcALen - srcBLen) zeroes has to included in the * ending of the output buffer * Once the zero padding is done the remaining of the output is calcualted * using convolution but with the shorter signal time shifted. */ /* Calculate the length of the remaining sequence */ tot = srcALen + srcBLen - 2U; if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ /* Initialise the pointer after zero padding */ pDst += srcALen - srcBLen; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + srcALen - 1U; /* Initialisation of the pointer after zero padding */ pDst += tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate convolution for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0.0f; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((i - j < srcBLen) && (j < srcALen)) { /* z[i] += x[i-j] * y[j] */ sum += pIn1[j] * pIn2[-((int32_t)i - j)]; } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = sum; else *pDst++ = sum; } } void ref_correlate_q31( q31_t * pSrcA, uint32_t srcALen, q31_t * pSrcB, uint32_t srcBLen, q31_t * pDst) { q31_t *pIn1 = pSrcA; /* inputA pointer */ q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q63_t sum; /* Accumulators */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate correlation for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to correlation equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum += ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)]); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q31_t)(sum >> 31U); else *pDst++ = (q31_t)(sum >> 31U); } } void ref_correlate_fast_q31( q31_t * pSrcA, uint32_t srcALen, q31_t * pSrcB, uint32_t srcBLen, q31_t * pDst) { q31_t *pIn1 = pSrcA; /* inputA pointer */ q31_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q63_t sum; /* Accumulators */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate correlation for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to correlation equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum = (q31_t) ((((q63_t) sum << 32) + ((q63_t) pIn1[j] * pIn2[-((int32_t) i - j)])) >> 32); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q31_t)(sum << 1U); else *pDst++ = (q31_t)(sum << 1U); } } void ref_correlate_q15( q15_t * pSrcA, uint32_t srcALen, q15_t * pSrcB, uint32_t srcBLen, q15_t * pDst) { q15_t *pIn1 = pSrcA; /* inputA pointer */ q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q63_t sum; /* Accumulators */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate convolution for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q15_t) ref_sat_q15(sum >> 15U); else *pDst++ = (q15_t) ref_sat_q15(sum >> 15U); } } void ref_correlate_fast_q15( q15_t * pSrcA, uint32_t srcALen, q15_t * pSrcB, uint32_t srcBLen, q15_t * pDst) { q15_t *pIn1 = pSrcA; /* inputA pointer */ q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q63_t sum; /* Accumulators */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate convolution for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q15_t)(sum >> 15U); else *pDst++ = (q15_t)(sum >> 15U); } } void ref_correlate_fast_opt_q15( q15_t * pSrcA, uint32_t srcALen, q15_t * pSrcB, uint32_t srcBLen, q15_t * pDst, q15_t * pScratch) { q15_t *pIn1 = pSrcA; /* inputA pointer */ q15_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q31_t sum; /* Accumulators */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate convolution for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum += ((q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q15_t) ref_sat_q15(sum >> 15U); else *pDst++ = (q15_t) ref_sat_q15(sum >> 15U); } } void ref_correlate_q7( q7_t * pSrcA, uint32_t srcALen, q7_t * pSrcB, uint32_t srcBLen, q7_t * pDst) { q7_t *pIn1 = pSrcA; /* inputA pointer */ q7_t *pIn2 = pSrcB + (srcBLen - 1U); /* inputB pointer */ q31_t sum; /* Accumulator */ uint32_t i = 0U, j; /* loop counters */ uint32_t inv = 0U; /* Reverse order flag */ uint32_t tot = 0U; /* Length */ /* Calculate the length of the remaining sequence */ tot = ((srcALen + srcBLen) - 2U); if (srcALen > srcBLen) { /* Calculating the number of zeros to be padded to the output */ j = srcALen - srcBLen; /* Initialise the pointer after zero padding */ pDst += j; } else if (srcALen < srcBLen) { /* Initialization to inputB pointer */ pIn1 = pSrcB; /* Initialization to the end of inputA pointer */ pIn2 = pSrcA + (srcALen - 1U); /* Initialisation of the pointer after zero padding */ pDst = pDst + tot; /* Swapping the lengths */ j = srcALen; srcALen = srcBLen; srcBLen = j; /* Setting the reverse flag */ inv = 1; } /* Loop to calculate convolution for output length number of times */ for (i = 0U; i <= tot; i++) { /* Initialize sum with zero to carry on MAC operations */ sum = 0; /* Loop to perform MAC operations according to convolution equation */ for (j = 0U; j <= i; j++) { /* Check the array limitations */ if ((((i - j) < srcBLen) && (j < srcALen))) { /* z[i] += x[i-j] * y[j] */ sum += ((q15_t) pIn1[j] * pIn2[-((int32_t) i - j)]); } } /* Store the output in the destination buffer */ if (inv == 1) *pDst-- = (q7_t) __SSAT((sum >> 7U), 8U); else *pDst++ = (q7_t) __SSAT((sum >> 7U), 8U); } }