From 6ab94e0b318884bbcb95e2ea3835f951502e1d99 Mon Sep 17 00:00:00 2001 From: jaseg Date: Wed, 14 Oct 2020 12:47:28 +0200 Subject: Move firmware into subdirectory --- .../arm_nn_mat_mult_kernel_q7_q15_reordered.c | 138 +++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 fw/cdc-dials/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c (limited to 'fw/cdc-dials/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c') diff --git a/fw/cdc-dials/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c b/fw/cdc-dials/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c new file mode 100644 index 0000000..36af21a --- /dev/null +++ b/fw/cdc-dials/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* ---------------------------------------------------------------------- + * Project: CMSIS NN Library + * Title: arm_nn_mat_mult_kernel_q7_q15_reordered.c + * Description: Matrix-multiplication function for convolution with reordered columns + * + * $Date: 17. January 2018 + * $Revision: V.1.0.0 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ + +#include "arm_nnfunctions.h" +#include "arm_math.h" + + /** + * @brief Matrix-multiplication function for convolution with reordered columns + * @param[in] pA pointer to operand A + * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors + * @param[in] ch_im_out numRow of A + * @param[in] numCol_A numCol of A + * @param[in] bias_shift amount of left-shift for bias + * @param[in] out_shift amount of right-shift for output + * @param[in] bias the bias + * @param[in,out] pOut pointer to output + * @return The function returns the incremented output pointer + * + * @details + * + * This function assumes that data in pInBuffer are reordered + */ + +q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA, + const q15_t * pInBuffer, + const uint16_t ch_im_out, + const uint16_t numCol_A, + const uint16_t bias_shift, + const uint16_t out_shift, + const q7_t * bias, + q7_t * pOut) +{ + +#if defined (ARM_MATH_DSP) + /* set up the second output pointers */ + q7_t *pOut2 = pOut + ch_im_out; + int i; + + /* this loop over rows in A */ + for (i = 0; i < ch_im_out; i += 2) + { + /* setup pointers for B */ + const q15_t *pB = pInBuffer; + const q15_t *pB2 = pB + numCol_A; + + /* align the second pointer for A */ + const q7_t *pA2 = pA + numCol_A; + + /* init the sum with bias */ + q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); + q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); + q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); + q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); + + uint16_t colCnt = numCol_A >> 2; + /* accumulate over the vector */ + while (colCnt) + { + q31_t inA11, inA12, inA21, inA22; + q31_t inB1 = *__SIMD32(pB)++; + q31_t inB2 = *__SIMD32(pB2)++; + + pA = (q7_t *) read_and_pad_reordered((void *)pA, &inA11, &inA12); + pA2 = (q7_t *) read_and_pad_reordered((void *)pA2, &inA21, &inA22); + + sum = __SMLAD(inA11, inB1, sum); + sum2 = __SMLAD(inA11, inB2, sum2); + sum3 = __SMLAD(inA21, inB1, sum3); + sum4 = __SMLAD(inA21, inB2, sum4); + + inB1 = *__SIMD32(pB)++; + inB2 = *__SIMD32(pB2)++; + + sum = __SMLAD(inA12, inB1, sum); + sum2 = __SMLAD(inA12, inB2, sum2); + sum3 = __SMLAD(inA22, inB1, sum3); + sum4 = __SMLAD(inA22, inB2, sum4); + + colCnt--; + } /* while over colCnt */ + colCnt = numCol_A & 0x3; + while (colCnt) + { + q7_t inA1 = *pA++; + q15_t inB1 = *pB++; + q7_t inA2 = *pA2++; + q15_t inB2 = *pB2++; + + sum += inA1 * inB1; + sum2 += inA1 * inB2; + sum3 += inA2 * inB1; + sum4 += inA2 * inB2; + colCnt--; + } /* while over colCnt */ + *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8); + *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8); + *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8); + *pOut2++ = (q7_t) __SSAT((sum4 >> out_shift), 8); + + /* skip the row computed with A2 */ + pA += numCol_A; + } /* for over ch_im_out */ + + pOut += ch_im_out; + + /* return the new output pointer with offset */ + return pOut; +#else + /* To be completed */ + return NULL; +#endif /* ARM_MATH_DSP */ +} -- cgit