From 6ab94e0b318884bbcb95e2ea3835f951502e1d99 Mon Sep 17 00:00:00 2001 From: jaseg Date: Wed, 14 Oct 2020 12:47:28 +0200 Subject: Move firmware into subdirectory --- .../nn_test/arm_nnexamples_nn_test.cpp | 801 +++++++++++++++++++++ 1 file changed, 801 insertions(+) create mode 100644 fw/midi-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp (limited to 'fw/midi-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp') diff --git a/fw/midi-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp b/fw/midi-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp new file mode 100644 index 0000000..41088fe --- /dev/null +++ b/fw/midi-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp @@ -0,0 +1,801 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010-2018 Arm Limited. All rights reserved. +* +* +* Project: CMSIS NN Library +* Title: arm_nnexamples_nn_test.cpp +* +* Description: Example code for NN kernel testing. +* +* Target Processor: Cortex-M cores +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* - Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* - Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided with the +* distribution. +* - Neither the name of ARM LIMITED nor the names of its contributors +* may be used to endorse or promote products derived from this +* software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +* POSSIBILITY OF SUCH DAMAGE. +* -------------------------------------------------------------------- */ + +#include "arm_nnexamples_nn_test.h" + +//#define TEST_SIGMOID +//#define TEST_TANH +#define TEST_POOL +#define TEST_RELU +#define TEST_IP +#define TEST_CONV +#define TEST_NONSQUARE +#define TEST_NNMULT + +int test_index = 0; +q7_t test_flags[50]; +bool test_pass; + +int main() +{ + printf("start tests\n"); + + srand(1); + + // common pointers for testing data + q7_t *test1; + q15_t *test2; + q7_t *test3; + q15_t *test4; + + for (test_index = 0; test_index<50; test_index++) { + test_flags[test_index] = -1; + } + test_index = 0; + +#ifdef TEST_NNMULT +#define NNMULT_DIM 128 + test1 = new q7_t[NNMULT_DIM*2]; + test2 = new q15_t[NNMULT_DIM*2]; + test3 = new q7_t[NNMULT_DIM*2]; + test4 = new q15_t[NNMULT_DIM*2]; + + q7_t * mult_out_q7 = test3; + q7_t * mult_ref_q7 = test3 + NNMULT_DIM; + q15_t * mult_out_q15 = test4; + q15_t * mult_ref_q15 = test4 + NNMULT_DIM; + + for (int i=0;i= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2) + { + printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]); + if_ave_pool_match = false; + } + } + if (if_ave_pool_match == true) + { + printf("Outputs match.\n"); + } + + delete[]test1; + delete[]test2; + delete[]test3; + +#endif + +#ifdef TEST_RELU + +#define RELU_DIM 127 + + test1 = new q7_t[RELU_DIM]; + test2 = new q15_t[RELU_DIM]; + test3 = new q7_t[RELU_DIM]; + test4 = new q15_t[RELU_DIM]; + + for (int i = 0; i < RELU_DIM; i++) + { + test1[i] = (rand() % 256 - 128); + test2[i] = (rand() % 65536 - 32768); + test3[i] = test1[i]; + test4[i] = test2[i]; + } + + q7_t *relu_ref_data_q7 = test1; + q7_t *relu_opt_data_q7 = test3; + q15_t *relu_ref_data_q15 = test2; + q15_t *relu_opt_data_q15 = test4; + + printf("Start ref relu q7 implementation\n"); + + arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM); + + printf("Start opt relu q7 implementation\n"); + + arm_relu_q7(relu_opt_data_q7, RELU_DIM); + + verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM); + + printf("Start ref relu q15 implementation\n"); + + arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM); + + printf("Start opt relu q15 implementation\n"); + + arm_relu_q15(relu_opt_data_q15, RELU_DIM); + + verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM); + + delete[]test1; + delete[]test2; + delete[]test3; + delete[]test4; + +#endif + +#ifdef TEST_IP + +#define IP_ROW_DIM 127 +#define IP_COL_DIM 127 + + q7_t ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT; + q7_t ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT; + q7_t ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT; + q15_t ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT; + q15_t ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15; + + test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM]; + test2 = new q15_t[IP_COL_DIM]; + test3 = new q7_t[IP_ROW_DIM * 3]; + test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2]; + + for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++) + { + test1[i] = rand() % 256 - 100; + } + for (int i = 0; i < IP_ROW_DIM * 3; i++) + { + test3[i] = 0; + } + + q7_t *ip_bias_q7 = test1 + IP_COL_DIM; + + q7_t *ip_out_q7_ref = test3; + q7_t *ip_out_q7_opt = test3 + IP_ROW_DIM; + q7_t *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM; + q15_t *ip_out_q15_ref = test4 + IP_COL_DIM; + q15_t *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM; + + initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM); + initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); + initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); + + printf("Start ref q7 implementation\n"); + + arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2); + + printf("Start q7 implementation\n"); + + arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2); + + verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM); + + printf("Start q7 ref opt implementation\n"); + + arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, + ip_out_q7_opt_fast, test2); + + verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); + + printf("Start q7 opt implementation\n"); + + arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast, + test2); + + verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM); + + for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++) + { + test4[i] = (rand() % 65536 - 32768); + } + + initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start ref q15 implementation\n"); + + arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL); + + printf("Start q15 implementation\n"); + + arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start ref opt q15 implementation\n"); + + arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, + NULL); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start opt q15 implementation\n"); + + arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start ref q7_q15 implementation\n"); + + arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref, + test2); + + printf("Start q7_q15 implementation\n"); + + arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt, + test2); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start ref opt q7_q15 implementation\n"); + + arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, + ip_out_q15_opt, test2); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + printf("Start opt q7_q15 implementation\n"); + + arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, + ip_out_q15_opt, test2); + + verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM); + + delete[]test1; + delete[]test2; + delete[]test3; + delete[]test4; + +#endif + +#ifdef TEST_NONSQUARE + +/* Use RCONV to differential with square CONV */ + +#define RCONV_IM_DIM_X 10 +#define RCONV_IM_DIM_Y 8 +#define RCONV_IM_CH 4 +#define RCONV_KER_DIM_X 5 +#define RCONV_KER_DIM_Y 3 +#define RCONV_STRIDE_X 1 +#define RCONV_STRIDE_Y 1 +#define RCONV_PADDING_X 2 +#define RCONV_PADDING_Y 1 +#define RCONV_OUT_CH 4 +#define RCONV_OUT_DIM_X 10 +#define RCONV_OUT_DIM_Y 8 + + test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; + test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH]; + test3 = + new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; + + for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++) + { + test1[i] = rand() % 256 - 100; + } + + for (int i = 0; + i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++) + { + test3[i] = rand() % 256 - 100; + } + + q7_t *rconv_weight_q7 = test1; + q7_t *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH; + + q15_t *rconv_buf = test2; + + q7_t *rconv_im_in_q7 = test3; + q7_t *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH; + q7_t *rconv_im_out_opt_q7 = + test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; + + initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + printf("start conv q7 nonsquare ref implementation\n"); + arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + printf("start conv q7 nonsquare opt implementation\n"); + arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + printf("start conv q7 nonsquare ref implementation\n"); + arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + printf("start conv q7 nonsquare basic implementation\n"); + arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + printf("start 1x1 conv q7 nonsquare fast implementation\n"); + arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X, + RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X, + RCONV_OUT_DIM_Y, rconv_buf, NULL); + + printf("start 1x1 conv q7 nonsquare dedicated function implementation\n"); + arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7, + RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X, + RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X, + RCONV_OUT_DIM_Y, rconv_buf, NULL); + + verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + printf("start depthwise separable conv q7 nonsquare ref implementation\n"); + arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, + rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, + RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y, + rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X, + RCONV_OUT_DIM_Y, rconv_buf, NULL); + + printf("start depthwise separable conv q7 nonsquare opt implementation\n"); + arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, + rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, + RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y, + rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X, + RCONV_OUT_DIM_Y, rconv_buf, NULL); + + verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + delete[]test1; + delete[]test2; + delete[]test3; + + test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias + test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH //buffer + + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o + + for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++) + { + test2[i] = rand() % 256 - 100; + } + + for (int i = 0; + i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH + + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; + i++) + { + test4[i] = rand() % 256 - 100; + } + + q15_t *rconv_weight_q15 = test2; + q15_t *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH; + + rconv_buf = test4; + + q15_t *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH; + q15_t *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH; + q15_t *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; + + initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + printf("start conv q15 nonsquare ref implementation\n"); + arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + printf("start conv q5 nonsquare opt implementation\n"); + arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15, + RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y, + RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15, + RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL); + + verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH); + + delete [] test2; + delete [] test4; +#endif + +#ifdef TEST_CONV + +#define CONV_IM_DIM 16 +#define CONV_IM_CH 16 +#define CONV_KER_DIM 5 +#define CONV_OUT_CH 16 +#define CONV_OUT_DIM 16 + + test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH]; + test2 = + new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + + 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH]; + test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH]; + test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH]; + + for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++) + { + test1[i] = rand() % 256 - 100; + } + + for (int i = 0; + i < + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + + 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++) + { + test2[i] = (rand() % 65536 - 32768); + } + + for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++) + { + test3[i] = rand() % 256 - 100; + } + + for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++) + { + test4[i] = (rand() % 65536 - 32768); + } + + q7_t *conv_weight_q7 = test1; + q7_t *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; + + q15_t *conv_weight_q15 = test2; + q15_t *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; + q15_t *conv_bias_q15 = + test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + + 2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH; + + q7_t *conv_im_in_q7 = test3; + q7_t *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH; + q7_t *conv_im_out_opt_q7 = + test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; + + q15_t *conv_im_in_q15 = test4; + q15_t *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH; + q15_t *conv_im_out_opt_q15 = + test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; + + initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q7 ref implementation\n"); + + arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, + CONV_OUT_DIM, conv_buf, NULL); + + printf("start q7 basic implementation\n"); + + arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q7 fast implementation\n"); + + arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + // testing with RGB + printf("start q7 ref implementation for RGB\n"); + + arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, + CONV_OUT_DIM, conv_buf, NULL); + + printf("start q7 basic implementation for RGB\n"); + + arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q7 RGB implementation for RGB\n"); + + arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + // testing q15 + initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q15 ref implementation\n"); + + arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15, + CONV_OUT_DIM, conv_buf, NULL); + + printf("start q15 basic implementation\n"); + + arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q15 fast implementation\n"); + + arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + // depthwise separable conv + initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + printf("start q7 depthwise_separable_conv ref implementation\n"); + + arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7, + CONV_OUT_DIM, conv_buf, NULL); + + printf("start q7 depthwise_separable_conv implementation\n"); + + arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7, + CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7, + CONV_OUT_DIM, conv_buf, NULL); + + verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH); + + delete[]test1; + delete[]test2; + delete[]test3; + delete[]test4; + +#endif + + test_pass = true; + test_index = 0; + while (test_flags[test_index] != -1) { + if (test_flags[test_index]) { + test_pass = false; + } + test_index ++; + } + if (test_pass) { + printf("All tests passed\n"); + } else { + printf("Test failed passed\n"); + } + + return 0; +} -- cgit