From 9f95ff5b6ba01db09552b84a0ab79607060a2666 Mon Sep 17 00:00:00 2001 From: Ali Labbene Date: Wed, 11 Dec 2019 08:59:21 +0100 Subject: Official ARM version: v5.4.0 Add CMSIS V5.4.0, please refer to index.html available under \docs folder. Note: content of \CMSIS\Core\Include has been copied under \Include to keep the same structure used in existing projects, and thus avoid projects mass update Note: the following components have been removed from ARM original delivery (as not used in ST packages) - CMSIS_EW2018.pdf - .gitattributes - .gitignore - \Device - \CMSIS - \CoreValidation - \DAP - \Documentation - \DoxyGen - \Driver - \Pack - \RTOS\CMSIS_RTOS_Tutorial.pdf - \RTOS\RTX - \RTOS\Template - \RTOS2\RTX - \Utilities - All ARM/GCC projects files are deleted from \DSP, \RTOS and \RTOS2 Change-Id: Ia026c3f0f0d016627a4fb5a9032852c33d24b4d3 --- docs/NN/html/group__FC.html | 755 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 755 insertions(+) create mode 100644 docs/NN/html/group__FC.html (limited to 'docs/NN/html/group__FC.html') diff --git a/docs/NN/html/group__FC.html b/docs/NN/html/group__FC.html new file mode 100644 index 0000000..659cf6d --- /dev/null +++ b/docs/NN/html/group__FC.html @@ -0,0 +1,755 @@ + + + + + +Fully-connected Layer Functions +CMSIS-NN: Fully-connected Layer Functions + + + + + + + + + + + + + + +
+
+ + + + + + + +
+
CMSIS-NN +  Version 1.1.0 +
+
CMSIS NN Software Library
+
+
+ +
+
    + +
+
+ + + +
+
+ +
+
+
+ +
+ + + + +
+ +
+ +
+ +
+
Fully-connected Layer Functions
+
+
+ + + + + + + + + + + + + + + + + + + + +

+Functions

arm_status arm_fully_connected_mat_q7_vec_q15 (const q15_t *pV, const q7_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q7_t *bias, q15_t *pOut, q15_t *vec_buffer)
 Mixed Q15-Q7 fully-connected layer function. More...
 
arm_status arm_fully_connected_mat_q7_vec_q15_opt (const q15_t *pV, const q7_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q7_t *bias, q15_t *pOut, q15_t *vec_buffer)
 Mixed Q15-Q7 opt fully-connected layer function. More...
 
arm_status arm_fully_connected_q15 (const q15_t *pV, const q15_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q15_t *bias, q15_t *pOut, q15_t *vec_buffer)
 Q15 opt fully-connected layer function. More...
 
arm_status arm_fully_connected_q15_opt (const q15_t *pV, const q15_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q15_t *bias, q15_t *pOut, q15_t *vec_buffer)
 Q15 opt fully-connected layer function. More...
 
arm_status arm_fully_connected_q7 (const q7_t *pV, const q7_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q7_t *bias, q7_t *pOut, q15_t *vec_buffer)
 Q7 basic fully-connected layer function. More...
 
arm_status arm_fully_connected_q7_opt (const q7_t *pV, const q7_t *pM, const uint16_t dim_vec, const uint16_t num_of_rows, const uint16_t bias_shift, const uint16_t out_shift, const q7_t *bias, q7_t *pOut, q15_t *vec_buffer)
 Q7 opt fully-connected layer function. More...
 
+

Description

+

Perform fully-connected layer

+

Fully-connected layer is basically a matrix-vector multiplication with bias. The matrix is the weights and the input/output vectors are the activation values. Supported {weight, activation} precisions include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}.

+

Here we have two types of kernel functions. The basic function implements the function using regular GEMV approach. The opt functions operates with weights in interleaved formats.

+

Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_mat_q7_vec_q15 (const q15_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q15_t * pOut,
q15_t * vec_buffer 
)
+
+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: 0

+

Q7_Q15 version of the fully connected layer

+

Weights are in q7_t and Activations are in q15_t

+ +

References NN_ROUND.

+ +

Referenced by gru_example().

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_mat_q7_vec_q15_opt (const q15_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q15_t * pOut,
q15_t * vec_buffer 
)
+
+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: 0

+

Q7_Q15 version of the fully connected layer

+

Weights are in q7_t and Activations are in q15_t

+

Limitation: x4 version requires weight reordering to work

+

Here we use only one pointer to read 4 rows in the weight matrix. So if the original q7_t matrix looks like this:

+

| a11 | a12 | a13 | a14 | a15 | a16 | a17 |

+

| a21 | a22 | a23 | a24 | a25 | a26 | a27 |

+

| a31 | a32 | a33 | a34 | a35 | a36 | a37 |

+

| a41 | a42 | a43 | a44 | a45 | a46 | a47 |

+

| a51 | a52 | a53 | a54 | a55 | a56 | a57 |

+

| a61 | a62 | a63 | a64 | a65 | a66 | a67 |

+

We operates on multiple-of-4 rows, so the first four rows becomes

+

| a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 |

+

| a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 |

+

| a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 |

+

The column left over will be in-order. which is: | a17 | a27 | a37 | a47 |

+

For the left-over rows, we do 1x1 computation, so the data remains as its original order.

+

So the stored weight matrix looks like this:

+

| a11 | a21 | a12 | a22 | a31 | a41 |

+

| a32 | a42 | a13 | a23 | a14 | a24 |

+

| a33 | a43 | a34 | a44 | a15 | a25 |

+

| a16 | a26 | a35 | a45 | a36 | a46 |

+

| a17 | a27 | a37 | a47 | a51 | a52 |

+

| a53 | a54 | a55 | a56 | a57 | a61 |

+

| a62 | a63 | a64 | a65 | a66 | a67 |

+ +

References NN_ROUND.

+ +

Referenced by gru_example().

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_q15 (const q15_t * pV,
const q15_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q15_t * bias,
q15_t * pOut,
q15_t * vec_buffer 
)
+
+

Q15 basic fully-connected layer function.

+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: 0

+ +

References NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_q15_opt (const q15_t * pV,
const q15_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q15_t * bias,
q15_t * pOut,
q15_t * vec_buffer 
)
+
+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: 0

+

Here we use only one pointer to read 4 rows in the weight matrix. So if the original matrix looks like this:

+

| a11 | a12 | a13 |

+

| a21 | a22 | a23 |

+

| a31 | a32 | a33 |

+

| a41 | a42 | a43 |

+

| a51 | a52 | a53 |

+

| a61 | a62 | a63 |

+

We operates on multiple-of-4 rows, so the first four rows becomes

+

| a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |

+

| a13 | a23 | a33 | a43 |

+

Remaining rows are kept the same original order.

+

So the stored weight matrix looks like this:

+

| a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |

+

| a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 |

+

| a62 | a63 |

+ +

References NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_q7 (const q7_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q7_t * pOut,
q15_t * vec_buffer 
)
+
+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: dim_vec

+

This basic function is designed to work with regular weight matrix without interleaving.

+ +

References arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
arm_status arm_fully_connected_q7_opt (const q7_t * pV,
const q7_t * pM,
const uint16_t dim_vec,
const uint16_t num_of_rows,
const uint16_t bias_shift,
const uint16_t out_shift,
const q7_t * bias,
q7_t * pOut,
q15_t * vec_buffer 
)
+
+
Parameters
+ + + + + + + + + + +
[in]pVpointer to input vector
[in]pMpointer to matrix weights
[in]dim_veclength of the vector
[in]num_of_rowsnumber of rows in weight matrix
[in]bias_shiftamount of left-shift for bias
[in]out_shiftamount of right-shift for output
[in]biaspointer to bias
[in,out]pOutpointer to output vector
[in,out]vec_bufferpointer to buffer space for input
+
+
+
Returns
The function returns ARM_MATH_SUCCESS
+

Buffer size:

+

vec_buffer size: dim_vec

+

This opt function is designed to work with interleaved weight matrix. The vector input is assumed in q7_t format, we call arm_q7_to_q15_no_shift_shuffle function to expand into q15_t format with certain weight re-ordering, refer to the function comments for more details. Here we use only one pointer to read 4 rows in the weight matrix. So if the original q7_t matrix looks like this:

+

| a11 | a12 | a13 | a14 | a15 | a16 | a17 |

+

| a21 | a22 | a23 | a24 | a25 | a26 | a27 |

+

| a31 | a32 | a33 | a34 | a35 | a36 | a37 |

+

| a41 | a42 | a43 | a44 | a45 | a46 | a47 |

+

| a51 | a52 | a53 | a54 | a55 | a56 | a57 |

+

| a61 | a62 | a63 | a64 | a65 | a66 | a67 |

+

We operates on multiple-of-4 rows, so the first four rows becomes

+

| a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 |

+

| a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 |

+

| a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 |

+

So within the kernel, we first read the re-ordered vector in as:

+

| b1 | b3 | and | b2 | b4 |

+

the four q31_t weights will look like

+

| a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 |

+

| a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 |

+

The column left over will be in-order. which is:

+

| a17 | a27 | a37 | a47 |

+

For the left-over rows, we do 1x1 computation, so the data remains as its original order.

+

So the stored weight matrix looks like this:

+

| a11 | a21 | a13 | a23 | a31 | a41 |

+

| a33 | a43 | a12 | a22 | a14 | a24 |

+

| a32 | a42 | a34 | a44 | a15 | a25 |

+

| a35 | a45 | a16 | a26 | a36 | a46 |

+

| a17 | a27 | a37 | a47 | a51 | a52 |

+

| a53 | a54 | a55 | a56 | a57 | a61 |

+

| a62 | a63 | a64 | a65 | a66 | a67 |

+ +

References arm_q7_to_q15_reordered_no_shift(), and NN_ROUND.

+ +

Referenced by main().

+ +
+
+
+
+ + + + -- cgit