From 9f95ff5b6ba01db09552b84a0ab79607060a2666 Mon Sep 17 00:00:00 2001 From: Ali Labbene Date: Wed, 11 Dec 2019 08:59:21 +0100 Subject: Official ARM version: v5.4.0 Add CMSIS V5.4.0, please refer to index.html available under \docs folder. Note: content of \CMSIS\Core\Include has been copied under \Include to keep the same structure used in existing projects, and thus avoid projects mass update Note: the following components have been removed from ARM original delivery (as not used in ST packages) - CMSIS_EW2018.pdf - .gitattributes - .gitignore - \Device - \CMSIS - \CoreValidation - \DAP - \Documentation - \DoxyGen - \Driver - \Pack - \RTOS\CMSIS_RTOS_Tutorial.pdf - \RTOS\RTX - \RTOS\Template - \RTOS2\RTX - \Utilities - All ARM/GCC projects files are deleted from \DSP, \RTOS and \RTOS2 Change-Id: Ia026c3f0f0d016627a4fb5a9032852c33d24b4d3 --- docs/NN/html/group__NNConv.html | 1796 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1796 insertions(+) create mode 100644 docs/NN/html/group__NNConv.html (limited to 'docs/NN/html/group__NNConv.html') diff --git a/docs/NN/html/group__NNConv.html b/docs/NN/html/group__NNConv.html new file mode 100644 index 0000000..b7f1ff6 --- /dev/null +++ b/docs/NN/html/group__NNConv.html @@ -0,0 +1,1796 @@ + + + + + +Neural Network Convolution Functions +CMSIS-NN: Neural Network Convolution Functions + + + + + + + + + + + + + + +
+
+ + + + + + + +
+
CMSIS-NN +  Version 1.1.0 +
+
CMSIS NN Software Library
+
+
+ +
+
    + +
+
+ + + +
+
+ +
+
+
+ +
+ + + + +
+ +
+ +
+ +
+
Neural Network Convolution Functions
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 version of 1x1 convolution (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q15_basic (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q15 convolution function. More...
 
arm_status arm_convolve_HWC_q15_fast (const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function. More...
 
arm_status arm_convolve_HWC_q15_fast_nonsquare (const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q15 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_basic (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function. More...
 
arm_status arm_convolve_HWC_q7_basic_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Basic Q7 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_fast (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function. More...
 
arm_status arm_convolve_HWC_q7_fast_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Fast Q7 convolution function (non-sqaure shape) More...
 
arm_status arm_convolve_HWC_q7_RGB (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 convolution function for RGB image. More...
 
arm_status arm_depthwise_separable_conv_HWC_q7 (const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function. More...
 
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
 Q7 depthwise separable convolution function (non-square shape) More...
 
+

Description

+

Perform convolution layer

+

The convolution is implemented in 2 steps: im2col and GEMM

+

im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.

+

To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.

+

Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare (const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.

+

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

+

[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861

+ +

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q15_basic (const q15_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

bufferA size: ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

This basic version is designed to work for any input tensor and weight dimension.

+ +

References NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q15_fast (const q15_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

Input dimension constraints:

+

ch_im_in is multiple of 2

+

ch_im_out is multipe of 2

+ +

References NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q15_fast_nonsquare (const q15_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q15_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q15_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q15_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

Input dimension constraints:

+

ch_im_in is multiple of 2

+

ch_im_out is multipe of 2

+ +

References NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q7_basic (const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

This basic version is designed to work for any input tensor and weight dimension.

+ +

References arm_nn_mat_mult_kernel_q7_q15(), arm_q7_to_q15_no_shift(), and NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q7_basic_nonsquare (const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+ +

References arm_nn_mat_mult_kernel_q7_q15(), arm_q7_to_q15_no_shift(), and NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q7_fast (const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

Input dimension constraints:

+

ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )

+

ch_im_out is multipe of 2 ( bacause 2x2 mat_mult kernel )

+

The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with arm_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.

+

The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.

+

To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.

+ +

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

+ +

Referenced by main().

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q7_fast_nonsquare (const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding size x
[in]padding_ypadding size y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

+ +

References arm_nn_mat_mult_kernel_q7_q15_reordered(), arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_convolve_HWC_q7_RGB (const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+

Q7 version of convolution for RGB image.

+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

Input dimension constraints:

+

ch_im_in equals 3

+

This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.

+ +

References arm_nn_mat_mult_kernel_q7_q15(), arm_nnword::half_words, NN_ROUND, and arm_nnword::word.

+ +

Referenced by main().

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_depthwise_separable_conv_HWC_q7 (const q7_t * Im_in,
const uint16_t dim_im_in,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel,
const uint16_t padding,
const uint16_t stride,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_ininput tensor dimention
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernelfilter kernel size
[in]paddingpadding sizes
[in]strideconvolution stride
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_outoutput tensor dimension
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

Buffer size:

+

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

+

bufferB size: 0

+

Input dimension constraints:

+

ch_im_in equals ch_im_out

+

Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)

+ +

References arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare (const q7_t * Im_in,
const uint16_t dim_im_in_x,
const uint16_t dim_im_in_y,
const uint16_t ch_im_in,
const q7_t * wt,
const uint16_t ch_im_out,
const uint16_t dim_kernel_x,
const uint16_t dim_kernel_y,
const uint16_t padding_x,
const uint16_t padding_y,
const uint16_t stride_x,
const uint16_t stride_y,
const q7_t * bias,
const uint16_t bias_shift,
const uint16_t out_shift,
q7_t * Im_out,
const uint16_t dim_im_out_x,
const uint16_t dim_im_out_y,
q15_t * bufferA,
q7_t * bufferB 
)
+
+
Parameters
+ + + + + + + + + + + + + + + + + + + + + +
[in]Im_inpointer to input tensor
[in]dim_im_in_xinput tensor dimention x
[in]dim_im_in_yinput tensor dimention y
[in]ch_im_innumber of input tensor channels
[in]wtpointer to kernel weights
[in]ch_im_outnumber of filters, i.e., output tensor channels
[in]dim_kernel_xfilter kernel size x
[in]dim_kernel_yfilter kernel size y
[in]padding_xpadding sizes x
[in]padding_ypadding sizes y
[in]stride_xconvolution stride x
[in]stride_yconvolution stride y
[in]biaspointer to bias
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in,out]Im_outpointer to output tensor
[in]dim_im_out_xoutput tensor dimension x
[in]dim_im_out_youtput tensor dimension y
[in,out]bufferApointer to buffer space for input
[in,out]bufferBpointer to buffer space for output
+
+
+
Returns
The function returns either ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking.
+

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 2 ch_im_out is multiple of 2

+ +

References arm_nnword::bytes, NN_ROUND, and arm_nnword::word.

+ +
+
+
+
+ + + + -- cgit