summaryrefslogtreecommitdiff
path: root/cdc-dials/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/ref_functions.h
blob: 5a25ffad11fb849ab9a5f5a9236efd64e114d9dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _REF_FUNCTIONS_H_
#define _REF_FUNCTIONS_H_

#include "arm_math.h"
#include "arm_nnfunctions.h"
//#include "arm_nnsupportfunctions.h"
#include "fully_connected_testing_weights.h"

#ifdef __cplusplus
extern    "C"
{
#endif

/*
 *
 * Convolution reference implemenation
 *
 */

    void      arm_convolve_HWC_q7_ref(const q7_t * Im_in,   // input image
                                      const uint16_t dim_im_in, // input image dimention
                                      const uint16_t ch_im_in,  // number of input image channels
                                      const q7_t * wt,  // kernel weights 
                                      const uint16_t ch_im_out, // number of filters, i.e., output image channels
                                      const uint16_t dim_kernel,    // filter kernel size
                                      const uint16_t padding,   // padding sizes
                                      const uint16_t stride,    // stride
                                      const q7_t * bias,    // bias
                                      const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,   // output image
                                      const uint16_t dim_im_out,    // output image dimension
                                      q15_t * bufferA,  //buffer space for input
                                      q7_t * bufferB    //buffer space for output
        );

    void      arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
                                                const uint16_t dim_im_in_x, // input image dimention x
                                                const uint16_t dim_im_in_y, // input image dimention y
                                                const uint16_t ch_im_in,    // number of input image channels
                                                const q7_t * wt,    // kernel weights 
                                                const uint16_t ch_im_out,   // number of filters, i.e., output image channels
                                                const uint16_t dim_kernel_x,    // filter kernel size x
                                                const uint16_t dim_kernel_y,    // filter kernel size y
                                                const uint16_t padding_x,   // padding sizes x
                                                const uint16_t padding_y,   // padding sizes y
                                                const uint16_t stride_x,    // stride x
                                                const uint16_t stride_y,    // stride y
                                                const q7_t * bias,  // bias
                                                const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
                                                const uint16_t dim_im_out_x,    // output image dimension x
                                                const uint16_t dim_im_out_y,    // output image dimension y
                                                q15_t * bufferA,    //buffer space for input
                                                q7_t * bufferB  //buffer space for output
        );

    void      arm_convolve_HWC_q15_ref(const q15_t * Im_in, // input image
                                       const uint16_t dim_im_in,    // input image dimention
                                       const uint16_t ch_im_in, // number of input image channels
                                       const q15_t * wt,    // kernel weights 
                                       const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                       const uint16_t dim_kernel,   // filter kernel size
                                       const uint16_t padding,  // padding sizes
                                       const uint16_t stride,   // stride
                                       const q15_t * bias,  // bias
                                       const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image
                                       const uint16_t dim_im_out,   // output image dimension
                                       q15_t * bufferA, //buffer space for input
                                       q7_t * bufferB   //buffer space for output
        );
    void      arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
                                                      const uint16_t dim_im_in_x,
                                                      const uint16_t dim_im_in_y,
                                                      const uint16_t ch_im_in,
                                                      const q15_t * wt,
                                                      const uint16_t ch_im_out,
                                                      const uint16_t dim_kernel_x,
                                                      const uint16_t dim_kernel_y,
                                                      const uint16_t padding_x,
                                                      const uint16_t padding_y,
                                                      const uint16_t stride_x,
                                                      const uint16_t stride_y,
                                                      const q15_t * bias,
                                                      const uint16_t bias_shift,
                                                      const uint16_t out_shift,
                                                      q15_t * Im_out,
                                                      const uint16_t dim_im_out_x,
                                                      const uint16_t dim_im_out_y, 
                                                      q15_t * bufferA, 
                                                      q7_t * bufferB);
													  
    void      arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in,   // input image
                                                      const uint16_t dim_im_in, // input image dimention
                                                      const uint16_t ch_im_in,  // number of input image channels
                                                      const q7_t * wt,  // kernel weights 
                                                      const uint16_t ch_im_out, // number of filters, i.e., output image channels
                                                      const uint16_t dim_kernel,    // filter kernel size
                                                      const uint16_t padding,   // padding sizes
                                                      const uint16_t stride,    // stride
                                                      const q7_t * bias,    // bias
                                                      const uint16_t bias_shift,    // amount of left-shift for bias
                                                      const uint16_t out_shift, // amount of right-shift for output
                                                      q7_t * Im_out,    // output image
                                                      const uint16_t dim_im_out,    // output image dimension
                                                      q15_t * bufferA,  //buffer space for input
                                                      q7_t * bufferB    //buffer space for output
        );
    void      arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
                                                                const uint16_t dim_im_in_x, // input image dimention x
                                                                const uint16_t dim_im_in_y, // input image dimention y
                                                                const uint16_t ch_im_in,    // number of input image channels
                                                                const q7_t * wt,    // kernel weights 
                                                                const uint16_t ch_im_out,   // number of filters, i.e., output image channels
                                                                const uint16_t dim_kernel_x,    // filter kernel size x
                                                                const uint16_t dim_kernel_y,    // filter kernel size y
                                                                const uint16_t padding_x,   // padding sizes x
                                                                const uint16_t padding_y,   // padding sizes y
                                                                const uint16_t stride_x,    // stride x
                                                                const uint16_t stride_y,    // stride y
                                                                const q7_t * bias,  // bias
                                                                const uint16_t bias_shift,  // amount of left-shift for bias
                                                                const uint16_t out_shift,   // amount of right-shift for output
                                                                q7_t * Im_out,  // output image
                                                                const uint16_t dim_im_out_x,    // output image dimension x
                                                                const uint16_t dim_im_out_y,    // output image dimension y
                                                                q15_t * bufferA,    //buffer space for input
                                                                q7_t * bufferB  //buffer space for output
        );

/*
 *
 * Fully-connected reference implemenation
 *
 */

    void      arm_fully_connected_q7_ref(const q7_t * pV,   // pointer to vector
                                         const q7_t * pM,   // pointer to matrix
                                         const uint16_t dim_vec,    // length of the vector
                                         const uint16_t num_of_rows,    // numCol of A
                                         const uint16_t bias_shift, // amount of left-shift for bias
                                         const uint16_t out_shift,  // amount of right-shift for output
                                         const q7_t * bias, q7_t * pOut,    // output operand
                                         q15_t * vec_buffer);

    void      arm_fully_connected_q15_ref(const q15_t * pV, // pointer to vector
                                          const q15_t * pM, // pointer to matrix
                                          const uint16_t dim_vec,   // length of the vector
                                          const uint16_t num_of_rows,   // numCol of A
                                          const uint16_t bias_shift,    // amount of left-shift for bias
                                          const uint16_t out_shift, // amount of right-shift for output
                                          const q15_t * bias, q15_t * pOut, // output operand
                                          q15_t * vec_buffer);

    void      arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,  // pointer to vector
                                                     const q7_t * pM,   // pointer to matrix
                                                     const uint16_t dim_vec,    // length of the vector
                                                     const uint16_t num_of_rows,    // numCol of A
                                                     const uint16_t bias_shift, // amount of left-shift for bias
                                                     const uint16_t out_shift,  // amount of right-shift for output
                                                     const q7_t * bias, q15_t * pOut,   // output operand
                                                     q15_t * vec_buffer);

    void      arm_fully_connected_q7_opt_ref(const q7_t * pV,   // pointer to vector
                                             const q7_t * pM,   // pointer to matrix
                                             const uint16_t dim_vec,    // length of the vector
                                             const uint16_t num_of_rows,    // numCol of A
                                             const uint16_t bias_shift, // amount of left-shift for bias
                                             const uint16_t out_shift,  // amount of right-shift for output
                                             const q7_t * bias, q7_t * pOut,    // output operand
                                             q15_t * vec_buffer);

    void      arm_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector
                                              const q15_t * pM, // pointer to matrix
                                              const uint16_t dim_vec,   // length of the vector
                                              const uint16_t num_of_rows,   // numCol of A
                                              const uint16_t bias_shift,    // amount of left-shift for bias
                                              const uint16_t out_shift, // amount of right-shift for output
                                              const q15_t * bias, q15_t * pOut, // output operand
                                              q15_t * vec_buffer);

    void      arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,  // pointer to vector
                                                         const q7_t * pM,   // pointer to matrix
                                                         const uint16_t dim_vec,    // length of the vector
                                                         const uint16_t num_of_rows,    // numCol of A
                                                         const uint16_t bias_shift, // amount of left-shift for bias
                                                         const uint16_t out_shift,  // amount of right-shift for output
                                                         const q7_t * bias, q15_t * pOut,   // output operand
                                                         q15_t * vec_buffer);

/*
 *
 * Pooling reference implemenation
 *
 */

    void      arm_avepool_q7_HWC_ref(const q7_t * Im_in,    // input image
                                     const uint16_t dim_im_in,  // input image dimension
                                     const uint16_t ch_im_in,   // number of input image channels
                                     const uint16_t dim_kernel, // window kernel size
                                     const uint16_t padding,    // padding sizes
                                     const uint16_t stride, // stride
                                     const uint16_t dim_im_out, // output image dimension
                                     q7_t * bufferA,    // a buffer for local storage
                                     q7_t * Im_out);

    void      arm_maxpool_q7_HWC_ref(const q7_t * Im_in,    // input image
                                     const uint16_t dim_im_in,  // input image dimension
                                     const uint16_t ch_im_in,   // number of input image channels
                                     const uint16_t dim_kernel, // window kernel size
                                     const uint16_t padding,    // padding sizes
                                     const uint16_t stride, // stride
                                     const uint16_t dim_im_out, // output image dimension
                                     q7_t * bufferA,    // a buffer for local storage
                                     q7_t * Im_out);

/*
 *
 * Other reference implemenation
 *
 */

    void      arm_relu_q7_ref(q7_t * data, uint16_t size);

    void      arm_relu_q15_ref(q15_t * data, uint16_t size);

    void      arm_nn_mult_q7_ref(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);

    void      arm_nn_mult_q15_ref(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);

#ifdef __cplusplus
}
#endif

#endif