From 96d6da4e252b06dcfdc041e7df23e86161c33007 Mon Sep 17 00:00:00 2001 From: rihab kouki Date: Tue, 28 Jul 2020 11:24:49 +0100 Subject: Official ARM version: v5.6.0 --- NN/Include/arm_nnfunctions.h | 205 ++++++++++++++++++++++++++++--------------- 1 file changed, 135 insertions(+), 70 deletions(-) (limited to 'NN/Include/arm_nnfunctions.h') diff --git a/NN/Include/arm_nnfunctions.h b/NN/Include/arm_nnfunctions.h index c6ec83a..331255b 100644 --- a/NN/Include/arm_nnfunctions.h +++ b/NN/Include/arm_nnfunctions.h @@ -34,7 +34,7 @@ * ------------ * * This user manual describes the CMSIS NN software library, - * a collection of efficient neural network kernels developed to maximize the + * a collection of efficient neural network kernels developed to maximize the * performance and minimize the memory footprint of neural networks on Cortex-M processor cores. * * The library is divided into a number of functions each covering a specific category: @@ -47,8 +47,8 @@ * * The library has separate functions for operating on different weight and activation data * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the - * kernels are included in the function description. The implementation details are also - * described in this paper [1]. + * kernels are included in the function description. The implementation details are also + * described in this paper [1]. * * Block Diagram * -------- @@ -86,7 +86,7 @@ /** * @defgroup groupNN Neural Network Functions - * These functions perform basic operations for neural network layers. + * These functions perform basic operations for neural network layers. */ #ifndef _ARM_NNFUNCTIONS_H @@ -111,12 +111,12 @@ extern "C" * * The convolution is implemented in 2 steps: im2col and GEMM * - * im2col is a process of converting each patch of image data into + * im2col is a process of converting each patch of image data into * a column. After im2col, the convolution is computed as matrix-matrix * multiplication. - * + * * To reduce the memory footprint, the im2col is performed partially. - * Each iteration, only a few column (i.e., patches) are generated and + * Each iteration, only a few column (i.e., patches) are generated and * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions. * */ @@ -136,9 +136,9 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS + * @return The function returns ARM_MATH_SUCCESS * */ @@ -153,9 +153,9 @@ extern "C" const q7_t * bias, const uint16_t bias_shift, const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + q7_t * Im_out, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -180,7 +180,7 @@ extern "C" * @param[in] dim_im_out_y output tensor dimension y * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS + * @return The function returns ARM_MATH_SUCCESS */ arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t * Im_in, @@ -219,9 +219,9 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS + * @return The function returns ARM_MATH_SUCCESS * */ @@ -236,9 +236,9 @@ extern "C" const q15_t * bias, const uint16_t bias_shift, const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + q15_t * Im_out, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -256,7 +256,7 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -278,9 +278,9 @@ extern "C" const q7_t * bias, const uint16_t bias_shift, const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + q7_t * Im_out, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -303,7 +303,7 @@ extern "C" * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out_x output tensor dimension x * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -355,7 +355,7 @@ extern "C" * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out_x output tensor dimension x * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -405,7 +405,7 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -426,9 +426,9 @@ extern "C" const q7_t * bias, const uint16_t bias_shift, const uint16_t out_shift, - q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + q7_t * Im_out, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -446,7 +446,7 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -468,9 +468,9 @@ extern "C" const q15_t * bias, const uint16_t bias_shift, const uint16_t out_shift, - q15_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + q15_t * Im_out, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -493,7 +493,7 @@ extern "C" * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out_x output tensor dimension x * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -508,7 +508,7 @@ extern "C" * * Input dimension constraints: * - * ch_im_in is multiple of 2 + * ch_im_in is multiple of 2 * * ch_im_out is multipe of 2 * @@ -532,10 +532,10 @@ extern "C" const uint16_t out_shift, q15_t * Im_out, const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t * bufferA, + const uint16_t dim_im_out_y, + q15_t * bufferA, q7_t * bufferB); - + /** * @brief Q7 depthwise separable convolution function * @param[in] Im_in pointer to input tensor @@ -551,7 +551,7 @@ extern "C" * @param[in] out_shift amount of right-shift for output * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -574,8 +574,8 @@ extern "C" const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, - const uint16_t dim_im_out, - q15_t * bufferA, + const uint16_t dim_im_out, + q15_t * bufferA, q7_t * bufferB); /** @@ -598,7 +598,7 @@ extern "C" * @param[in,out] Im_out pointer to output tensor * @param[in] dim_im_out_x output tensor dimension x * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input + * @param[in,out] bufferA pointer to buffer space for input * @param[in,out] bufferB pointer to buffer space for output * @return The function returns either * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. @@ -642,7 +642,7 @@ extern "C" * * Here we have two types of kernel functions. The basic function * implements the function using regular GEMV approach. The opt functions - * operates with weights in interleaved formats. + * operates with weights in interleaved formats. * */ @@ -666,9 +666,9 @@ extern "C" const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut, + const uint16_t out_shift, + const q7_t * bias, + q7_t * pOut, q15_t * vec_buffer); /** @@ -691,9 +691,9 @@ extern "C" const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, - q7_t * pOut, + const uint16_t out_shift, + const q7_t * bias, + q7_t * pOut, q15_t * vec_buffer); /** @@ -716,9 +716,9 @@ extern "C" const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, + const uint16_t out_shift, + const q15_t * bias, + q15_t * pOut, q15_t * vec_buffer); /** @@ -742,8 +742,8 @@ extern "C" const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, - const q15_t * bias, - q15_t * pOut, + const q15_t * bias, + q15_t * pOut, q15_t * vec_buffer); /** @@ -767,8 +767,8 @@ extern "C" const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut, + const q7_t * bias, + q15_t * pOut, q15_t * vec_buffer); /** @@ -792,16 +792,16 @@ extern "C" const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, - const q7_t * bias, - q15_t * pOut, + const q7_t * bias, + q15_t * pOut, q15_t * vec_buffer); /** * @brief Matrix-Multiplication Kernels for Convolution * - * These functions are used within convolution layer functions for + * These functions are used within convolution layer functions for * matrix multiplication. - * + * * The implementation is similar to CMSIS-DSP arm_mat_mult functions * with one Q7 and one Q15 operands. The Q15 operand is the im2col * output which is always with 2 columns. @@ -826,8 +826,8 @@ extern "C" const uint16_t ch_im_out, const uint16_t numCol_A, const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, + const uint16_t out_shift, + const q7_t * bias, q7_t * pOut); /** @@ -848,8 +848,8 @@ extern "C" const uint16_t ch_im_out, const uint16_t numCol_A, const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t * bias, + const uint16_t out_shift, + const q7_t * bias, q7_t * pOut); #ifdef __cplusplus @@ -902,7 +902,7 @@ extern "C" * @return none. */ - void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, + void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, arm_nn_activation_type type); /** @@ -944,9 +944,9 @@ extern "C" const uint16_t ch_im_in, const uint16_t dim_kernel, const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, + const uint16_t stride, + const uint16_t dim_im_out, + q7_t * bufferA, q7_t * Im_out); /** @@ -969,9 +969,9 @@ extern "C" const uint16_t ch_im_in, const uint16_t dim_kernel, const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t * bufferA, + const uint16_t stride, + const uint16_t dim_im_out, + q7_t * bufferA, q7_t * Im_out); /** @@ -1003,6 +1003,71 @@ extern "C" void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out); + /** + * @brief uint8 depthwise convolution function with asymmetric quantization for even number of channel multiplier + * and input channels. Unless specified otherwise, arguments are mandatory. + * + * @param[in] input Pointer to input tensor + * @param[in] input_x Width of input tensor + * @param[in] input_y Height of input tensor + * @param[in] input_ch Channels in input tensor + * @param[in] kernel Pointer to kernel weights + * @param[in] kernel_x Width of kernel + * @param[in] kernel_y Height of kernel + * @param[in] ch_mult Number of channel multiplier + * @param[in] pad_x Padding sizes x + * @param[in] pad_y Padding sizes y + * @param[in] stride_x Convolution stride along the width + * @param[in] stride_y Convolution stride along the height + * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement. + * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement. + * @param[in] bias Pointer to optional bias values. If no bias is + * availble, NULL is expected + * @param[in] input_offset Input tensor zero offset + * @param[in] filter_offset Kernel tensor zero offset + * @param[in] output_offset Output tensor zero offset + * @param[in,out] output Pointer to output tensor + * @param[in] output_x Width of output tensor + * @param[in] output_y Height of output tensor + * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255} + * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255} + * @param[in] out_shift Amount of right-shift for output + * @param[in] out_mult Output multiplier for requantization + * @return The function returns one of the following + * ARM_MATH_SIZE_MISMATCH - Not supported dimension of tensors + * ARM_MATH_SUCCESS - Successful operation + * ARM_MATH_ARGUMENT_ERROR - Implementation not available + * + * Input constraints + * ch_mult is multiple of 2 + * kernel_x is multiple of 2 + * + */ + arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input, + const uint16_t input_x, + const uint16_t input_y, + const uint16_t input_ch, + const uint8_t *kernel, + const uint16_t kernel_x, + const uint16_t kernel_y, + const int16_t ch_mult, + const int16_t pad_x, + const int16_t pad_y, + const int16_t stride_x, + const int16_t stride_y, + const int16_t dilation_x, + const int16_t dilation_y, + const int32_t *bias, + const int32_t input_offset, + const int32_t filter_offset, + const int32_t output_offset, + uint8_t *output, + const uint16_t output_x, + const uint16_t output_y, + const int32_t output_activation_min, + const int32_t output_activation_max, + const int32_t out_shift, + const int32_t out_mult); #ifdef __cplusplus } #endif -- cgit