Official ARM version: v5.6.0HEAD master

author: rihab kouki <rihab.kouki@st.com> 2020-07-28 11:24:49 +0100
committer: rihab kouki <rihab.kouki@st.com> 2020-07-28 11:24:49 +0100
commit: 96d6da4e252b06dcfdc041e7df23e86161c33007 (patch)
tree: a262f59bb1db7ec7819acae435f5049cbe5e2354 /NN/Include
parent: 9f95ff5b6ba01db09552b84a0ab79607060a2666 (diff)
download: st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.tar.gz
st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.tar.bz2
st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.zip
3 files changed, 216 insertions, 87 deletions
diff --git a/NN/Include/arm_nn_tables.h b/NN/Include/arm_nn_tables.h
index d56d82c..36be5a8 100644
--- a/NN/Include/arm_nn_tables.h
+++ b/NN/Include/arm_nn_tables.h
@@ -53,7 +53,4 @@ extern const q15_t tanhTable_q15[256];
 extern const q15_t sigmoidHTable_q15[192];
 extern const q15_t sigmoidLTable_q15[128];
 
-extern const q15_t sigmoidLTable_q15[128];
-extern const q15_t sigmoidHTable_q15[192];
-
 #endif                          /*  ARM_NN_TABLES_H */
diff --git a/NN/Include/arm_nnfunctions.h b/NN/Include/arm_nnfunctions.h
index c6ec83a..331255b 100644
--- a/NN/Include/arm_nnfunctions.h
+++ b/NN/Include/arm_nnfunctions.h
@@ -34,7 +34,7 @@
    * ------------
    *
    * This user manual describes the CMSIS NN software library,
-   * a collection of efficient neural network kernels developed to maximize the 
+   * a collection of efficient neural network kernels developed to maximize the
    * performance and minimize the memory footprint of neural networks on Cortex-M processor cores.
    *
    * The library is divided into a number of functions each covering a specific category:
@@ -47,8 +47,8 @@
    *
    * The library has separate functions for operating on different weight and activation data
    * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the
-   * kernels are included in the function description. The implementation details are also 
-   * described in this paper [1]. 
+   * kernels are included in the function description. The implementation details are also
+   * described in this paper [1].
    *
    * Block Diagram
    * --------
@@ -86,7 +86,7 @@
 
 /**
  * @defgroup groupNN Neural Network Functions
- * These functions perform basic operations for neural network layers. 
+ * These functions perform basic operations for neural network layers.
  */
 
 #ifndef _ARM_NNFUNCTIONS_H
@@ -111,12 +111,12 @@ extern    "C"
  *
  * The convolution is implemented in 2 steps: im2col and GEMM
  *
- * im2col is a process of converting each patch of image data into 
+ * im2col is a process of converting each patch of image data into
  * a column. After im2col, the convolution is computed as matrix-matrix
  * multiplication.
- * 
+ *
  * To reduce the memory footprint, the im2col is performed partially.
- * Each iteration, only a few column (i.e., patches) are generated and 
+ * Each iteration, only a few column (i.e., patches) are generated and
  * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions.
  *
  */
@@ -136,9 +136,9 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
-   * @return     The function returns <code>ARM_MATH_SUCCESS</code> 
+   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
    *
    */
 
@@ -153,9 +153,9 @@ extern    "C"
                                          const q7_t * bias,
                                          const uint16_t bias_shift,
                                          const uint16_t out_shift,
-                                         q7_t * Im_out, 
-                                         const uint16_t dim_im_out, 
-                                         q15_t * bufferA, 
+                                         q7_t * Im_out,
+                                         const uint16_t dim_im_out,
+                                         q15_t * bufferA,
                                          q7_t * bufferB);
 
   /**
@@ -180,7 +180,7 @@ extern    "C"
    * @param[in]       dim_im_out_y output tensor dimension y
    * @param[in,out]   bufferA      pointer to buffer space for input
    * @param[in,out]   bufferB      pointer to buffer space for output
-   * @return     The function returns <code>ARM_MATH_SUCCESS</code> 
+   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
    */
 
     arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t * Im_in,
@@ -219,9 +219,9 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
-   * @return     The function returns <code>ARM_MATH_SUCCESS</code> 
+   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
    *
    */
 
@@ -236,9 +236,9 @@ extern    "C"
                                           const q15_t * bias,
                                           const uint16_t bias_shift,
                                           const uint16_t out_shift,
-                                          q15_t * Im_out, 
-                                          const uint16_t dim_im_out, 
-                                          q15_t * bufferA, 
+                                          q15_t * Im_out,
+                                          const uint16_t dim_im_out,
+                                          q15_t * bufferA,
                                           q7_t * bufferB);
 
   /**
@@ -256,7 +256,7 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -278,9 +278,9 @@ extern    "C"
                                         const q7_t * bias,
                                         const uint16_t bias_shift,
                                         const uint16_t out_shift,
-                                        q7_t * Im_out, 
-                                        const uint16_t dim_im_out, 
-                                        q15_t * bufferA, 
+                                        q7_t * Im_out,
+                                        const uint16_t dim_im_out,
+                                        q15_t * bufferA,
                                         q7_t * bufferB);
 
   /**
@@ -303,7 +303,7 @@ extern    "C"
    * @param[in,out]   Im_out       pointer to output tensor
    * @param[in]       dim_im_out_x output tensor dimension x
    * @param[in]       dim_im_out_y output tensor dimension y
-   * @param[in,out]   bufferA      pointer to buffer space for input 
+   * @param[in,out]   bufferA      pointer to buffer space for input
    * @param[in,out]   bufferB      pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -355,7 +355,7 @@ extern    "C"
    * @param[in,out]   Im_out       pointer to output tensor
    * @param[in]       dim_im_out_x output tensor dimension x
    * @param[in]       dim_im_out_y output tensor dimension y
-   * @param[in,out]   bufferA      pointer to buffer space for input 
+   * @param[in,out]   bufferA      pointer to buffer space for input
    * @param[in,out]   bufferB      pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -405,7 +405,7 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -426,9 +426,9 @@ extern    "C"
                                        const q7_t * bias,
                                        const uint16_t bias_shift,
                                        const uint16_t out_shift,
-                                       q7_t * Im_out, 
-                                       const uint16_t dim_im_out, 
-                                       q15_t * bufferA, 
+                                       q7_t * Im_out,
+                                       const uint16_t dim_im_out,
+                                       q15_t * bufferA,
                                        q7_t * bufferB);
 
   /**
@@ -446,7 +446,7 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -468,9 +468,9 @@ extern    "C"
                                          const q15_t * bias,
                                          const uint16_t bias_shift,
                                          const uint16_t out_shift,
-                                         q15_t * Im_out, 
-                                         const uint16_t dim_im_out, 
-                                         q15_t * bufferA, 
+                                         q15_t * Im_out,
+                                         const uint16_t dim_im_out,
+                                         q15_t * bufferA,
                                          q7_t * bufferB);
 
   /**
@@ -493,7 +493,7 @@ extern    "C"
    * @param[in,out]   Im_out       pointer to output tensor
    * @param[in]       dim_im_out_x output tensor dimension x
    * @param[in]       dim_im_out_y output tensor dimension y
-   * @param[in,out]   bufferA      pointer to buffer space for input 
+   * @param[in,out]   bufferA      pointer to buffer space for input
    * @param[in,out]   bufferB      pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -508,7 +508,7 @@ extern    "C"
    *
    * <b>Input dimension constraints:</b>
    *
-   * ch_im_in is multiple of 2 
+   * ch_im_in is multiple of 2
    *
    * ch_im_out is multipe of 2
    *
@@ -532,10 +532,10 @@ extern    "C"
                               const uint16_t out_shift,
                               q15_t * Im_out,
                               const uint16_t dim_im_out_x,
-                              const uint16_t dim_im_out_y, 
-                              q15_t * bufferA, 
+                              const uint16_t dim_im_out_y,
+                              q15_t * bufferA,
                               q7_t * bufferB);
-										 
+
   /**
    * @brief Q7 depthwise separable convolution function
    * @param[in]       Im_in       pointer to input tensor
@@ -551,7 +551,7 @@ extern    "C"
    * @param[in]       out_shift   amount of right-shift for output
    * @param[in,out]   Im_out      pointer to output tensor
    * @param[in]       dim_im_out  output tensor dimension
-   * @param[in,out]   bufferA     pointer to buffer space for input 
+   * @param[in,out]   bufferA     pointer to buffer space for input
    * @param[in,out]   bufferB     pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -574,8 +574,8 @@ extern    "C"
                                                    const uint16_t bias_shift,
                                                    const uint16_t out_shift,
                                                    q7_t * Im_out,
-                                                   const uint16_t dim_im_out, 
-                                                   q15_t * bufferA, 
+                                                   const uint16_t dim_im_out,
+                                                   q15_t * bufferA,
                                                    q7_t * bufferB);
 
   /**
@@ -598,7 +598,7 @@ extern    "C"
    * @param[in,out]   Im_out        pointer to output tensor
    * @param[in]       dim_im_out_x  output tensor dimension x
    * @param[in]       dim_im_out_y  output tensor dimension y
-   * @param[in,out]   bufferA       pointer to buffer space for input 
+   * @param[in,out]   bufferA       pointer to buffer space for input
    * @param[in,out]   bufferB       pointer to buffer space for output
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
@@ -642,7 +642,7 @@ extern    "C"
  *
  * Here we have two types of kernel functions. The basic function
  * implements the function using regular GEMV approach. The opt functions
- * operates with weights in interleaved formats. 
+ * operates with weights in interleaved formats.
  *
  */
 
@@ -666,9 +666,9 @@ extern    "C"
                                       const uint16_t dim_vec,
                                       const uint16_t num_of_rows,
                                       const uint16_t bias_shift,
-                                      const uint16_t out_shift, 
-                                      const q7_t * bias, 
-                                      q7_t * pOut, 
+                                      const uint16_t out_shift,
+                                      const q7_t * bias,
+                                      q7_t * pOut,
                                       q15_t * vec_buffer);
 
   /**
@@ -691,9 +691,9 @@ extern    "C"
                                           const uint16_t dim_vec,
                                           const uint16_t num_of_rows,
                                           const uint16_t bias_shift,
-                                          const uint16_t out_shift, 
-                                          const q7_t * bias, 
-                                          q7_t * pOut, 
+                                          const uint16_t out_shift,
+                                          const q7_t * bias,
+                                          q7_t * pOut,
                                           q15_t * vec_buffer);
 
   /**
@@ -716,9 +716,9 @@ extern    "C"
                                        const uint16_t dim_vec,
                                        const uint16_t num_of_rows,
                                        const uint16_t bias_shift,
-                                       const uint16_t out_shift, 
-                                       const q15_t * bias, 
-                                       q15_t * pOut, 
+                                       const uint16_t out_shift,
+                                       const q15_t * bias,
+                                       q15_t * pOut,
                                        q15_t * vec_buffer);
 
   /**
@@ -742,8 +742,8 @@ extern    "C"
                                            const uint16_t num_of_rows,
                                            const uint16_t bias_shift,
                                            const uint16_t out_shift,
-                                           const q15_t * bias, 
-                                           q15_t * pOut, 
+                                           const q15_t * bias,
+                                           q15_t * pOut,
                                            q15_t * vec_buffer);
 
   /**
@@ -767,8 +767,8 @@ extern    "C"
                                                   const uint16_t num_of_rows,
                                                   const uint16_t bias_shift,
                                                   const uint16_t out_shift,
-                                                  const q7_t * bias, 
-                                                  q15_t * pOut, 
+                                                  const q7_t * bias,
+                                                  q15_t * pOut,
                                                   q15_t * vec_buffer);
 
   /**
@@ -792,16 +792,16 @@ extern    "C"
                                                       const uint16_t num_of_rows,
                                                       const uint16_t bias_shift,
                                                       const uint16_t out_shift,
-                                                      const q7_t * bias, 
-                                                      q15_t * pOut, 
+                                                      const q7_t * bias,
+                                                      q15_t * pOut,
                                                       q15_t * vec_buffer);
 
 /**
  * @brief Matrix-Multiplication Kernels for Convolution
  *
- * These functions are used within convolution layer functions for 
+ * These functions are used within convolution layer functions for
  * matrix multiplication.
- * 
+ *
  * The implementation is similar to CMSIS-DSP arm_mat_mult functions
  * with one Q7 and one Q15 operands. The Q15 operand is the im2col
  * output which is always with 2 columns.
@@ -826,8 +826,8 @@ extern    "C"
                                             const uint16_t ch_im_out,
                                             const uint16_t numCol_A,
                                             const uint16_t bias_shift,
-                                            const uint16_t out_shift, 
-                                            const q7_t * bias, 
+                                            const uint16_t out_shift,
+                                            const q7_t * bias,
                                             q7_t * pOut);
 
   /**
@@ -848,8 +848,8 @@ extern    "C"
                                                       const uint16_t ch_im_out,
                                                       const uint16_t numCol_A,
                                                       const uint16_t bias_shift,
-                                                      const uint16_t out_shift, 
-                                                      const q7_t * bias, 
+                                                      const uint16_t out_shift,
+                                                      const q7_t * bias,
                                                       q7_t * pOut);
 
 #ifdef __cplusplus
@@ -902,7 +902,7 @@ extern    "C"
    * @return none.
    */
 
-    void      arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, 
+    void      arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width,
                                            arm_nn_activation_type type);
 
   /**
@@ -944,9 +944,9 @@ extern    "C"
                                  const uint16_t ch_im_in,
                                  const uint16_t dim_kernel,
                                  const uint16_t padding,
-                                 const uint16_t stride, 
-                                 const uint16_t dim_im_out, 
-                                 q7_t * bufferA, 
+                                 const uint16_t stride,
+                                 const uint16_t dim_im_out,
+                                 q7_t * bufferA,
                                  q7_t * Im_out);
 
   /**
@@ -969,9 +969,9 @@ extern    "C"
                                  const uint16_t ch_im_in,
                                  const uint16_t dim_kernel,
                                  const uint16_t padding,
-                                 const uint16_t stride, 
-                                 const uint16_t dim_im_out, 
-                                 q7_t * bufferA, 
+                                 const uint16_t stride,
+                                 const uint16_t dim_im_out,
+                                 q7_t * bufferA,
                                  q7_t * Im_out);
 
 /**
@@ -1003,6 +1003,71 @@ extern    "C"
 
     void      arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out);
 
+  /**
+   * @brief uint8 depthwise convolution function with asymmetric quantization for even number of channel multiplier
+   *        and input channels. Unless specified otherwise, arguments are mandatory.
+   *
+   * @param[in]     input     Pointer to input tensor
+   * @param[in]     input_x   Width of input tensor
+   * @param[in]     input_y   Height of input tensor
+   * @param[in]     input_ch  Channels in input tensor
+   * @param[in]     kernel    Pointer to kernel weights
+   * @param[in]     kernel_x  Width of kernel
+   * @param[in]     kernel_y  Height of kernel
+   * @param[in]     ch_mult   Number of channel multiplier
+   * @param[in]     pad_x     Padding sizes x
+   * @param[in]     pad_y     Padding sizes y
+   * @param[in]     stride_x  Convolution stride along the width
+   * @param[in]     stride_y  Convolution stride along the height
+   * @param[in]     dilation_x Dilation along width. Not used and intended for future enhancement.
+   * @param[in]     dilation_y Dilation along height. Not used and intended for future enhancement.
+   * @param[in]     bias       Pointer to optional bias values. If no bias is
+   *                           availble, NULL is expected
+   * @param[in]     input_offset  Input tensor zero offset
+   * @param[in]     filter_offset Kernel tensor zero offset
+   * @param[in]     output_offset Output tensor zero offset
+   * @param[in,out] output        Pointer to output tensor
+   * @param[in]     output_x  Width of output tensor
+   * @param[in]     output_y  Height of output tensor
+   * @param[in]     output_activation_min   Minimum value to clamp the output to. Range : {0, 255}
+   * @param[in]     output_activation_max   Minimum value to clamp the output to. Range : {0, 255}
+   * @param[in]     out_shift  Amount of right-shift for output
+   * @param[in]     out_mult   Output multiplier for requantization
+   * @return        The function returns one of the following
+   *                <code>ARM_MATH_SIZE_MISMATCH</code> - Not supported dimension of tensors
+   *                <code>ARM_MATH_SUCCESS</code> - Successful operation
+   *                <code>ARM_MATH_ARGUMENT_ERROR</code> - Implementation not available
+   *
+   * <b> Input constraints</b>
+   * ch_mult  is multiple of 2
+   * kernel_x is multiple of 2
+   *
+   */
+    arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
+                                                const uint16_t input_x,
+                                                const uint16_t input_y,
+                                                const uint16_t input_ch,
+                                                const uint8_t *kernel,
+                                                const uint16_t kernel_x,
+                                                const uint16_t kernel_y,
+                                                const int16_t ch_mult,
+                                                const int16_t pad_x,
+                                                const int16_t pad_y,
+                                                const int16_t stride_x,
+                                                const int16_t stride_y,
+                                                const int16_t dilation_x,
+                                                const int16_t dilation_y,
+                                                const int32_t *bias,
+                                                const int32_t input_offset,
+                                                const int32_t filter_offset,
+                                                const int32_t output_offset,
+                                                uint8_t *output,
+                                                const uint16_t output_x,
+                                                const uint16_t output_y,
+                                                const int32_t output_activation_min,
+                                                const int32_t output_activation_max,
+                                                const int32_t out_shift,
+                                                const int32_t out_mult);
 #ifdef __cplusplus
 }
 #endif
diff --git a/NN/Include/arm_nnsupportfunctions.h b/NN/Include/arm_nnsupportfunctions.h
index 8460190..af426e1 100644
--- a/NN/Include/arm_nnsupportfunctions.h
+++ b/NN/Include/arm_nnsupportfunctions.h
@@ -32,13 +32,17 @@
 
 #include "arm_math.h"
 #include "arm_common_tables.h"
-//#include <cstring>
 
 #ifdef __cplusplus
 extern    "C"
 {
 #endif
 
+#define LEFT_SHIFT(_shift)  (_shift > 0 ? _shift : 0)
+#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
+#define Q31_MIN (0x80000000L)
+#define Q31_MAX (0x7FFFFFFFL)
+
 /**
  * @brief Union for SIMD access of Q31/Q15/Q7 types
  */
@@ -72,11 +76,11 @@ typedef enum
  */
 
 /**
- * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift 
- * @param[in]       *pSrc points to the Q7 input vector    
- * @param[out]      *pDst points to the Q15 output vector   
- * @param[in]       blockSize length of the input vector    
- * @return none.    
+ * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift
+ * @param[in]       *pSrc points to the Q7 input vector
+ * @param[out]      *pDst points to the Q15 output vector
+ * @param[in]       blockSize length of the input vector
+ * @return none.
  *
  */
 
@@ -84,10 +88,10 @@ void      arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t block
 
 /**
  * @brief  Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
- * @param[in]       *pSrc points to the Q7 input vector    
- * @param[out]      *pDst points to the Q15 output vector   
- * @param[in]       blockSize length of the input vector    
- * @return none.    
+ * @param[in]       *pSrc points to the Q7 input vector
+ * @param[out]      *pDst points to the Q15 output vector
+ * @param[in]       blockSize length of the input vector
+ * @return none.
  *
  */
 
@@ -163,7 +167,7 @@ void arm_nn_mult_q15(
   q15_t * pDst,
   const uint16_t out_shift,
   uint32_t blockSize);
-  
+
 /**
  * @brief           Q7 vector multiplication with variable output shifts
  * @param[in]       *pSrcA        pointer to the first input vector
@@ -185,16 +189,79 @@ void arm_nn_mult_q7(
   q7_t * pDst,
   const uint16_t out_shift,
   uint32_t blockSize);
- 
+
 /**
- * @brief defition to adding rouding offset
+ * @brief macro for adding rounding offset
  */
 #ifndef ARM_NN_TRUNCATE
-    #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) )
+    #define NN_ROUND(out_shift) ( (0x1u << out_shift) >> 1 )
 #else
     #define NN_ROUND(out_shift) 0
 #endif
 
+/**
+ * @brief           Saturating doubling high multiply. Result matches
+ *                  NEON instruction VQRDMULH.
+ * @param[in]       m1        Multiplicand
+ * @param[in]       m2        Multiplier
+ * @return          Result of multiplication.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_sat_doubling_high_mult(const q31_t m1, const q31_t m2)
+{
+    q31_t result = 0;
+    // Rounding offset to add for a right shift of 31
+    q63_t mult = 1 << 30;
+
+    if ((m1 < 0) ^ (m2 < 0))
+    {
+        mult = 1 - mult;
+    }
+    // Gets resolved as a SMLAL instruction
+    mult = mult + (q63_t)m1 * m2;
+
+    // Utilize all of the upper 32 bits. This is the doubling step
+    // as well.
+    result = mult / (1UL << 31);
+
+    if ((m1 == m2) && (m1 == Q31_MIN))
+    {
+        result = Q31_MAX;
+    }
+    return result;
+}
+
+/**
+ * @brief           Rounding divide by power of two.
+ * @param[in]       dividend - Dividend
+ * @param[in]       exponent - Divisor = power(2, exponent)
+ *                             Range: [0, 31]
+ * @return          Rounded result of division. Midpoint is rounded away from zero.
+ *
+ */
+__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
+{
+    q31_t result = 0;
+    const q31_t remainder_mask = (1l << exponent) - 1;
+    int32_t remainder = remainder_mask & dividend;
+
+    // Basic division
+    result = dividend >> exponent;
+
+    // Adjust 'result' for rounding (mid point away from zero)
+    q31_t threshold = remainder_mask >> 1;
+    if (result < 0)
+    {
+        threshold++;
+    }
+    if (remainder > threshold)
+    {
+        result++;
+    }
+
+    return result;
+}
+
 #ifdef __cplusplus
 }
 #endif
author	rihab kouki <rihab.kouki@st.com>	2020-07-28 11:24:49 +0100
committer	rihab kouki <rihab.kouki@st.com>	2020-07-28 11:24:49 +0100
commit	96d6da4e252b06dcfdc041e7df23e86161c33007 (patch)
tree	a262f59bb1db7ec7819acae435f5049cbe5e2354 /NN/Include
parent	9f95ff5b6ba01db09552b84a0ab79607060a2666 (diff)
download	st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.tar.gz st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.tar.bz2 st-cmsis-core-lowfat-96d6da4e252b06dcfdc041e7df23e86161c33007.zip