From 96d6da4e252b06dcfdc041e7df23e86161c33007 Mon Sep 17 00:00:00 2001
From: rihab kouki <rihab.kouki@st.com>
Date: Tue, 28 Jul 2020 11:24:49 +0100
Subject: Official ARM version: v5.6.0

---
 DSP/Include/arm_math.h | 3792 +++++++++++++++++++++++++-----------------------
 1 file changed, 1998 insertions(+), 1794 deletions(-)

(limited to 'DSP/Include/arm_math.h')
diff --git a/DSP/Include/arm_math.h b/DSP/Include/arm_math.h
index ea9dd26..eb37f82 100644
--- a/DSP/Include/arm_math.h
+++ b/DSP/Include/arm_math.h
@@ -1,11 +1,11 @@
 /******************************************************************************
  * @file     arm_math.h
- * @brief    Public header file for CMSIS DSP LibraryU
- * @version  V1.5.3
- * @date     10. January 2018
+ * @brief    Public header file for CMSIS DSP Library
+ * @version  V1.6.0
+ * @date     18. March 2019
  ******************************************************************************/
 /*
- * Copyright (c) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2010-2019 Arm Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -37,7 +37,7 @@
    * - Complex math functions
    * - Filters
    * - Matrix functions
-   * - Transforms
+   * - Transform functions
    * - Motor control functions
    * - Statistical functions
    * - Support functions
@@ -73,11 +73,7 @@
    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
    * public header file <code> arm_math.h</code> for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
-   * Define the appropriate preprocessor macro ARM_MATH_CM7 or ARM_MATH_CM4 or ARM_MATH_CM3 or
-   * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
-   * For Armv8-M cores define preprocessor macro ARM_MATH_ARMV8MBL or ARM_MATH_ARMV8MML.
-   * Set preprocessor macro __DSP_PRESENT if Armv8-M Mainline core supports DSP instructions.
-   * 
+   *
    *
    * Examples
    * --------
@@ -93,7 +89,7 @@
    * Building the Library
    * ------------
    *
-   * The library installer contains a project file to rebuild libraries on MDK toolchain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
+   * The library installer contains a project file to rebuild libraries on MDK toolchain in the <code>CMSIS\\DSP\\Projects\\ARM</code> folder.
    * - arm_cortexM_math.uvprojx
    *
    *
@@ -104,10 +100,6 @@
    *
    * Each library project have different preprocessor macros.
    *
-   * - UNALIGNED_SUPPORT_DISABLE:
-   *
-   * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
-   *
    * - ARM_MATH_BIG_ENDIAN:
    *
    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
@@ -120,46 +112,41 @@
    *
    * Define macro ARM_MATH_ROUNDING for rounding on support functions
    *
-   * - ARM_MATH_CMx:
-   *
-   * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
-   * and ARM_MATH_CM0 for building library on Cortex-M0 target, ARM_MATH_CM0PLUS for building library on Cortex-M0+ target, and
-   * ARM_MATH_CM7 for building the library on cortex-M7.
+   * - ARM_MATH_LOOPUNROLL:
    *
-   * - ARM_MATH_ARMV8MxL:
+   * Define macro ARM_MATH_LOOPUNROLL to enable manual loop unrolling in DSP functions
    *
-   * Define macro ARM_MATH_ARMV8MBL for building the library on Armv8-M Baseline target, ARM_MATH_ARMV8MML for building library
-   * on Armv8-M Mainline target.
+   * - ARM_MATH_NEON:
    *
-   * - __FPU_PRESENT:
+   * Define macro ARM_MATH_NEON to enable Neon versions of the DSP functions.
+   * It is not enabled by default when Neon is available because performances are 
+   * dependent on the compiler and target architecture.
    *
-   * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for floating point libraries.
+   * - ARM_MATH_NEON_EXPERIMENTAL:
    *
-   * - __DSP_PRESENT:
-   *
-   * Initialize macro __DSP_PRESENT = 1 when Armv8-M Mainline core supports DSP instructions.
+   * Define macro ARM_MATH_NEON_EXPERIMENTAL to enable experimental Neon versions of 
+   * of some DSP functions. Experimental Neon versions currently do not have better
+   * performances than the scalar versions.
    *
    * <hr>
    * CMSIS-DSP in ARM::CMSIS Pack
    * -----------------------------
    *
    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
-   * |File/Folder                   |Content                                                                 |
-   * |------------------------------|------------------------------------------------------------------------|
-   * |\b CMSIS\\Documentation\\DSP  | This documentation                                                     |
-   * |\b CMSIS\\DSP_Lib             | Software license agreement (license.txt)                               |
-   * |\b CMSIS\\DSP_Lib\\Examples   | Example projects demonstrating the usage of the library functions      |
-   * |\b CMSIS\\DSP_Lib\\Source     | Source files for rebuilding the library                                |
+   * |File/Folder                      |Content                                                                 |
+   * |---------------------------------|------------------------------------------------------------------------|
+   * |\b CMSIS\\Documentation\\DSP     | This documentation                                                     |
+   * |\b CMSIS\\DSP\\DSP_Lib_TestSuite | DSP_Lib test suite                                                     |
+   * |\b CMSIS\\DSP\\Examples          | Example projects demonstrating the usage of the library functions      |
+   * |\b CMSIS\\DSP\\Include           | DSP_Lib include files                                                  |
+   * |\b CMSIS\\DSP\\Lib               | DSP_Lib binaries                                                       |
+   * |\b CMSIS\\DSP\\Projects          | Projects to rebuild DSP_Lib binaries                                   |
+   * |\b CMSIS\\DSP\\Source            | DSP_Lib source files                                                   |
    *
    * <hr>
    * Revision History of CMSIS-DSP
    * ------------
    * Please refer to \ref ChangeLog_pg.
-   *
-   * Copyright Notice
-   * ------------
-   *
-   * Copyright (C) 2010-2015 Arm Limited. All rights reserved.
    */
 
 
@@ -220,8 +207,8 @@
  * There is an associated initialization function for each type of matrix
  * data structure.
  * The initialization function sets the values of the internal structure fields.
- * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
- * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
+ * Refer to \ref arm_mat_init_f32(), \ref arm_mat_init_q31() and \ref arm_mat_init_q15()
+ * for floating-point, Q31 and Q15 types,  respectively.
  *
  * \par
  * Use of the initialization function is optional. However, if initialization function is used
@@ -272,6 +259,7 @@
 /**
  * @defgroup groupStats Statistics Functions
  */
+
 /**
  * @defgroup groupSupport Support Functions
  */
@@ -286,6 +274,8 @@
 /**
  * @defgroup groupExamples Examples
  */
+
+
 #ifndef _ARM_MATH_H
 #define _ARM_MATH_H
 
@@ -295,10 +285,10 @@
 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
 
 #elif defined ( __GNUC__ )
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wsign-conversion"
-#pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wsign-conversion"
+  #pragma GCC diagnostic ignored "-Wconversion"
+  #pragma GCC diagnostic ignored "-Wunused-parameter"
 
 #elif defined ( __ICCARM__ )
 
@@ -308,42 +298,40 @@
 
 #elif defined ( __TASKING__ )
 
+#elif defined ( _MSC_VER )
+
 #else
   #error Unknown compiler
 #endif
 
 
-#define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
-
-#if defined(ARM_MATH_CM7)
-  #include "core_cm7.h"
-  #define ARM_MATH_DSP
-#elif defined (ARM_MATH_CM4)
-  #include "core_cm4.h"
-  #define ARM_MATH_DSP
-#elif defined (ARM_MATH_CM3)
-  #include "core_cm3.h"
-#elif defined (ARM_MATH_CM0)
-  #include "core_cm0.h"
-  #define ARM_MATH_CM0_FAMILY
-#elif defined (ARM_MATH_CM0PLUS)
-  #include "core_cm0plus.h"
-  #define ARM_MATH_CM0_FAMILY
-#elif defined (ARM_MATH_ARMV8MBL)
-  #include "core_armv8mbl.h"
-  #define ARM_MATH_CM0_FAMILY
-#elif defined (ARM_MATH_ARMV8MML)
-  #include "core_armv8mml.h"
-  #if (defined (__DSP_PRESENT) && (__DSP_PRESENT == 1))
-    #define ARM_MATH_DSP
-  #endif
+/* Included for instrinsics definitions */
+#if !defined ( _MSC_VER )
+#include "cmsis_compiler.h"
 #else
-  #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS, ARM_MATH_CM0, ARM_MATH_ARMV8MBL, ARM_MATH_ARMV8MML"
+#include <stdint.h>
+#define __STATIC_FORCEINLINE static __forceinline
+#define __ALIGNED(x) __declspec(align(x))
+#define LOW_OPTIMIZATION_ENTER
+#define LOW_OPTIMIZATION_EXIT
+#define IAR_ONLY_LOW_OPTIMIZATION_ENTER 
+#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 #endif
 
-#undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
 #include "string.h"
 #include "math.h"
+#include "float.h"
+
+/* evaluate ARM DSP feature */
+#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
+  #define ARM_MATH_DSP                   1
+#endif
+
+#if defined(__ARM_NEON)
+#include <arm_neon.h>
+#endif
+
+
 #ifdef   __cplusplus
 extern "C"
 {
@@ -379,18 +367,6 @@ extern "C"
   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
 #define INPUT_SPACING         0xB60B61
 
-  /**
-   * @brief Macro for Unaligned Support
-   */
-#ifndef UNALIGNED_SUPPORT_DISABLE
-    #define ALIGN4
-#else
-  #if defined  (__GNUC__)
-    #define ALIGN4 __attribute__((aligned(4)))
-  #else
-    #define ALIGN4 __align(4)
-  #endif
-#endif   /* #ifndef UNALIGNED_SUPPORT_DISABLE */
 
   /**
    * @brief Error status returned by some functions in the library.
@@ -398,13 +374,13 @@ extern "C"
 
   typedef enum
   {
-    ARM_MATH_SUCCESS = 0,                /**< No error */
+    ARM_MATH_SUCCESS        =  0,        /**< No error */
     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
-    ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
-    ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
-    ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
-    ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
-    ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
+    ARM_MATH_LENGTH_ERROR   = -2,        /**< Length of data buffer is incorrect */
+    ARM_MATH_SIZE_MISMATCH  = -3,        /**< Size of matrices is not compatible with the operation */
+    ARM_MATH_NANINF         = -4,        /**< Not-a-number (NaN) or infinity is generated */
+    ARM_MATH_SINGULAR       = -5,        /**< Input matrix is singular and cannot be inverted */
+    ARM_MATH_TEST_FAILURE   = -6         /**< Test Failed */
   } arm_status;
 
   /**
@@ -437,87 +413,261 @@ extern "C"
    */
   typedef double float64_t;
 
-  /**
-   * @brief definition to read/write two 16 bit values.
-   */
+
+/**
+  @brief definition to read/write two 16 bit values.
+  @deprecated
+ */
 #if   defined ( __CC_ARM )
   #define __SIMD32_TYPE int32_t __packed
-  #define CMSIS_UNUSED __attribute__((unused))
-  #define CMSIS_INLINE __attribute__((always_inline))
-
 #elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
   #define __SIMD32_TYPE int32_t
-  #define CMSIS_UNUSED __attribute__((unused))
-  #define CMSIS_INLINE __attribute__((always_inline))
-
 #elif defined ( __GNUC__ )
   #define __SIMD32_TYPE int32_t
-  #define CMSIS_UNUSED __attribute__((unused))
-  #define CMSIS_INLINE __attribute__((always_inline))
-
 #elif defined ( __ICCARM__ )
   #define __SIMD32_TYPE int32_t __packed
-  #define CMSIS_UNUSED
-  #define CMSIS_INLINE
-
 #elif defined ( __TI_ARM__ )
   #define __SIMD32_TYPE int32_t
-  #define CMSIS_UNUSED __attribute__((unused))
-  #define CMSIS_INLINE
-
 #elif defined ( __CSMC__ )
   #define __SIMD32_TYPE int32_t
-  #define CMSIS_UNUSED
-  #define CMSIS_INLINE
-
 #elif defined ( __TASKING__ )
-  #define __SIMD32_TYPE __unaligned int32_t
-  #define CMSIS_UNUSED
-  #define CMSIS_INLINE
-
+  #define __SIMD32_TYPE __un(aligned) int32_t
+#elif defined(_MSC_VER )
+  #define __SIMD32_TYPE int32_t
 #else
   #error Unknown compiler
 #endif
 
 #define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
-#define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
-#define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
-#define __SIMD64(addr)        (*(int64_t **) & (addr))
+#define __SIMD32_CONST(addr)  ( (__SIMD32_TYPE * )   (addr))
+#define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE * )   (addr))
+#define __SIMD64(addr)        (*(      int64_t **) & (addr))
 
-#if !defined (ARM_MATH_DSP)
+/* SIMD replacement */
+
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2 (
+  q15_t * pQ15)
+{
+  q31_t val;
+
+  memcpy (&val, pQ15, 4);
+
+  return (val);
+}
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2_ia (
+  q15_t ** pQ15)
+{
+  q31_t val;
+
+  memcpy (&val, *pQ15, 4);
+  *pQ15 += 2;
+
+  return (val);
+}
+
+/**
+  @brief         Read 2 Q15 from Q15 pointer and decrement pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q15x2_da (
+  q15_t ** pQ15)
+{
+  q31_t val;
+
+  memcpy (&val, *pQ15, 4);
+  *pQ15 -= 2;
+
+  return (val);
+}
+
+/**
+  @brief         Write 2 Q15 to Q15 pointer and increment pointer afterwards.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x2_ia (
+  q15_t ** pQ15,
+  q31_t    value)
+{
+  q31_t val = value;
+
+  memcpy (*pQ15, &val, 4);
+  *pQ15 += 2;
+}
+
+/**
+  @brief         Write 2 Q15 to Q15 pointer.
+  @param[in]     pQ15      points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q15x2 (
+  q15_t * pQ15,
+  q31_t   value)
+{
+  q31_t val = value;
+
+  memcpy (pQ15, &val, 4);
+}
+
+
+/**
+  @brief         Read 4 Q7 from Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q7x4_ia (
+  q7_t ** pQ7)
+{
+  q31_t val;
+
+  memcpy (&val, *pQ7, 4);
+  *pQ7 += 4;
+
+  return (val);
+}
+
+/**
+  @brief         Read 4 Q7 from Q7 pointer and decrement pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @return        Q31 value
+ */
+__STATIC_FORCEINLINE q31_t read_q7x4_da (
+  q7_t ** pQ7)
+{
+  q31_t val;
+
+  memcpy (&val, *pQ7, 4);
+  *pQ7 -= 4;
+
+  return (val);
+}
+
+/**
+  @brief         Write 4 Q7 to Q7 pointer and increment pointer afterwards.
+  @param[in]     pQ7       points to input value
+  @param[in]     value     Q31 value
+  @return        none
+ */
+__STATIC_FORCEINLINE void write_q7x4_ia (
+  q7_t ** pQ7,
+  q31_t   value)
+{
+  q31_t val = value;
+
+  memcpy (*pQ7, &val, 4);
+  *pQ7 += 4;
+}
+
+/*
+
+Normally those kind of definitions are in a compiler file
+in Core or Core_A.
+
+But for MSVC compiler it is a bit special. The goal is very specific
+to CMSIS-DSP and only to allow the use of this library from other
+systems like Python or Matlab.
+
+MSVC is not going to be used to cross-compile to ARM. So, having a MSVC
+compiler file in Core or Core_A would not make sense.
+
+*/
+#if defined ( _MSC_VER )
+    __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data)
+    {
+      if (data == 0U) { return 32U; }
+
+      uint32_t count = 0U;
+      uint32_t mask = 0x80000000U;
+
+      while ((data & mask) == 0U)
+      {
+        count += 1U;
+        mask = mask >> 1U;
+      }
+      return count;
+    }
+
+  __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat)
+  {
+    if ((sat >= 1U) && (sat <= 32U))
+    {
+      const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U);
+      const int32_t min = -1 - max ;
+      if (val > max)
+      {
+        return max;
+      }
+      else if (val < min)
+      {
+        return min;
+      }
+    }
+    return val;
+  }
+
+  __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat)
+  {
+    if (sat <= 31U)
+    {
+      const uint32_t max = ((1U << sat) - 1U);
+      if (val > (int32_t)max)
+      {
+        return max;
+      }
+      else if (val < 0)
+      {
+        return 0U;
+      }
+    }
+    return (uint32_t)val;
+  }
+#endif
+
+#ifndef ARM_MATH_DSP
   /**
    * @brief definition to pack two 16 bit values.
    */
-#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
-                                    (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
-#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
-                                    (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
-
-#endif /* !defined (ARM_MATH_DSP) */
+  #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
+                                      (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
+  #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
+                                      (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
+#endif
 
    /**
    * @brief definition to pack four 8 bit values.
    */
 #ifndef ARM_MATH_BIG_ENDIAN
-
-#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
-                                (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
-                                (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
-                                (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
+  #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
+                                  (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
+                                  (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
+                                  (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
 #else
-
-#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
-                                (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
-                                (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
-                                (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
-
+  #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
+                                  (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
+                                  (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
+                                  (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
 #endif
 
 
   /**
    * @brief Clips Q63 to Q31 values.
    */
-  CMSIS_INLINE __STATIC_INLINE q31_t clip_q63_to_q31(
+  __STATIC_FORCEINLINE q31_t clip_q63_to_q31(
   q63_t x)
   {
     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
@@ -527,7 +677,7 @@ extern "C"
   /**
    * @brief Clips Q63 to Q15 values.
    */
-  CMSIS_INLINE __STATIC_INLINE q15_t clip_q63_to_q15(
+  __STATIC_FORCEINLINE q15_t clip_q63_to_q15(
   q63_t x)
   {
     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
@@ -537,7 +687,7 @@ extern "C"
   /**
    * @brief Clips Q31 to Q7 values.
    */
-  CMSIS_INLINE __STATIC_INLINE q7_t clip_q31_to_q7(
+  __STATIC_FORCEINLINE q7_t clip_q31_to_q7(
   q31_t x)
   {
     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
@@ -547,7 +697,7 @@ extern "C"
   /**
    * @brief Clips Q31 to Q15 values.
    */
-  CMSIS_INLINE __STATIC_INLINE q15_t clip_q31_to_q15(
+  __STATIC_FORCEINLINE q15_t clip_q31_to_q15(
   q31_t x)
   {
     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
@@ -557,23 +707,21 @@ extern "C"
   /**
    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
    */
-
-  CMSIS_INLINE __STATIC_INLINE q63_t mult32x64(
+  __STATIC_FORCEINLINE q63_t mult32x64(
   q63_t x,
   q31_t y)
   {
     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
-            (((q63_t) (x >> 32) * y)));
+            (((q63_t) (x >> 32)                * y)      )  );
   }
 
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
    */
-
-  CMSIS_INLINE __STATIC_INLINE uint32_t arm_recip_q31(
-  q31_t in,
-  q31_t * dst,
-  q31_t * pRecipTable)
+  __STATIC_FORCEINLINE uint32_t arm_recip_q31(
+        q31_t in,
+        q31_t * dst,
+  const q31_t * pRecipTable)
   {
     q31_t out;
     uint32_t tempVal;
@@ -621,10 +769,10 @@ extern "C"
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t arm_recip_q15(
-  q15_t in,
-  q15_t * dst,
-  q15_t * pRecipTable)
+  __STATIC_FORCEINLINE uint32_t arm_recip_q15(
+        q15_t in,
+        q15_t * dst,
+  const q15_t * pRecipTable)
   {
     q15_t out = 0;
     uint32_t tempVal = 0;
@@ -668,16 +816,55 @@ extern "C"
     return (signBits + 1);
   }
 
+#if defined(ARM_MATH_NEON)
+
+static inline float32x4_t __arm_vec_sqrt_f32_neon(float32x4_t  x)
+{
+    float32x4_t x1 = vmaxq_f32(x, vdupq_n_f32(FLT_MIN));
+    float32x4_t e = vrsqrteq_f32(x1);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
+    return vmulq_f32(x, e);
+}
+
+static inline int16x8_t __arm_vec_sqrt_q15_neon(int16x8_t vec)
+{
+    float32x4_t tempF;
+    int32x4_t tempHI,tempLO;
+
+    tempLO = vmovl_s16(vget_low_s16(vec));
+    tempF = vcvtq_n_f32_s32(tempLO,15);
+    tempF = __arm_vec_sqrt_f32_neon(tempF);
+    tempLO = vcvtq_n_s32_f32(tempF,15);
+
+    tempHI = vmovl_s16(vget_high_s16(vec));
+    tempF = vcvtq_n_f32_s32(tempHI,15);
+    tempF = __arm_vec_sqrt_f32_neon(tempF);
+    tempHI = vcvtq_n_s32_f32(tempF,15);
+
+    return(vcombine_s16(vqmovn_s32(tempLO),vqmovn_s32(tempHI)));
+}
+
+static inline int32x4_t __arm_vec_sqrt_q31_neon(int32x4_t vec)
+{
+  float32x4_t temp;
+
+  temp = vcvtq_n_f32_s32(vec,31);
+  temp = __arm_vec_sqrt_f32_neon(temp);
+  return(vcvtq_n_s32_f32(temp,31));
+}
+
+#endif
 
 /*
- * @brief C custom defined intrinsic function for M3 and M0 processors
+ * @brief C custom defined intrinsic functions
  */
 #if !defined (ARM_MATH_DSP)
 
   /*
-   * @brief C custom defined QADD8 for M3 and M0 processors
+   * @brief C custom defined QADD8
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QADD8(
+  __STATIC_FORCEINLINE uint32_t __QADD8(
   uint32_t x,
   uint32_t y)
   {
@@ -693,9 +880,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QSUB8 for M3 and M0 processors
+   * @brief C custom defined QSUB8
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QSUB8(
+  __STATIC_FORCEINLINE uint32_t __QSUB8(
   uint32_t x,
   uint32_t y)
   {
@@ -711,9 +898,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QADD16 for M3 and M0 processors
+   * @brief C custom defined QADD16
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QADD16(
+  __STATIC_FORCEINLINE uint32_t __QADD16(
   uint32_t x,
   uint32_t y)
   {
@@ -728,9 +915,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SHADD16 for M3 and M0 processors
+   * @brief C custom defined SHADD16
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SHADD16(
+  __STATIC_FORCEINLINE uint32_t __SHADD16(
   uint32_t x,
   uint32_t y)
   {
@@ -744,9 +931,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QSUB16 for M3 and M0 processors
+   * @brief C custom defined QSUB16
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QSUB16(
+  __STATIC_FORCEINLINE uint32_t __QSUB16(
   uint32_t x,
   uint32_t y)
   {
@@ -760,9 +947,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SHSUB16 for M3 and M0 processors
+   * @brief C custom defined SHSUB16
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SHSUB16(
+  __STATIC_FORCEINLINE uint32_t __SHSUB16(
   uint32_t x,
   uint32_t y)
   {
@@ -776,9 +963,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QASX for M3 and M0 processors
+   * @brief C custom defined QASX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QASX(
+  __STATIC_FORCEINLINE uint32_t __QASX(
   uint32_t x,
   uint32_t y)
   {
@@ -792,9 +979,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SHASX for M3 and M0 processors
+   * @brief C custom defined SHASX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SHASX(
+  __STATIC_FORCEINLINE uint32_t __SHASX(
   uint32_t x,
   uint32_t y)
   {
@@ -808,9 +995,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QSAX for M3 and M0 processors
+   * @brief C custom defined QSAX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __QSAX(
+  __STATIC_FORCEINLINE uint32_t __QSAX(
   uint32_t x,
   uint32_t y)
   {
@@ -824,9 +1011,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SHSAX for M3 and M0 processors
+   * @brief C custom defined SHSAX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SHSAX(
+  __STATIC_FORCEINLINE uint32_t __SHSAX(
   uint32_t x,
   uint32_t y)
   {
@@ -840,9 +1027,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMUSDX for M3 and M0 processors
+   * @brief C custom defined SMUSDX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUSDX(
+  __STATIC_FORCEINLINE uint32_t __SMUSDX(
   uint32_t x,
   uint32_t y)
   {
@@ -851,9 +1038,9 @@ extern "C"
   }
 
   /*
-   * @brief C custom defined SMUADX for M3 and M0 processors
+   * @brief C custom defined SMUADX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUADX(
+  __STATIC_FORCEINLINE uint32_t __SMUADX(
   uint32_t x,
   uint32_t y)
   {
@@ -863,9 +1050,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QADD for M3 and M0 processors
+   * @brief C custom defined QADD
    */
-  CMSIS_INLINE __STATIC_INLINE int32_t __QADD(
+  __STATIC_FORCEINLINE int32_t __QADD(
   int32_t x,
   int32_t y)
   {
@@ -874,9 +1061,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined QSUB for M3 and M0 processors
+   * @brief C custom defined QSUB
    */
-  CMSIS_INLINE __STATIC_INLINE int32_t __QSUB(
+  __STATIC_FORCEINLINE int32_t __QSUB(
   int32_t x,
   int32_t y)
   {
@@ -885,9 +1072,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMLAD for M3 and M0 processors
+   * @brief C custom defined SMLAD
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLAD(
+  __STATIC_FORCEINLINE uint32_t __SMLAD(
   uint32_t x,
   uint32_t y,
   uint32_t sum)
@@ -899,9 +1086,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMLADX for M3 and M0 processors
+   * @brief C custom defined SMLADX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLADX(
+  __STATIC_FORCEINLINE uint32_t __SMLADX(
   uint32_t x,
   uint32_t y,
   uint32_t sum)
@@ -913,9 +1100,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMLSDX for M3 and M0 processors
+   * @brief C custom defined SMLSDX
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLSDX(
+  __STATIC_FORCEINLINE uint32_t __SMLSDX(
   uint32_t x,
   uint32_t y,
   uint32_t sum)
@@ -927,9 +1114,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMLALD for M3 and M0 processors
+   * @brief C custom defined SMLALD
    */
-  CMSIS_INLINE __STATIC_INLINE uint64_t __SMLALD(
+  __STATIC_FORCEINLINE uint64_t __SMLALD(
   uint32_t x,
   uint32_t y,
   uint64_t sum)
@@ -942,9 +1129,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMLALDX for M3 and M0 processors
+   * @brief C custom defined SMLALDX
    */
-  CMSIS_INLINE __STATIC_INLINE uint64_t __SMLALDX(
+  __STATIC_FORCEINLINE uint64_t __SMLALDX(
   uint32_t x,
   uint32_t y,
   uint64_t sum)
@@ -957,9 +1144,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMUAD for M3 and M0 processors
+   * @brief C custom defined SMUAD
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUAD(
+  __STATIC_FORCEINLINE uint32_t __SMUAD(
   uint32_t x,
   uint32_t y)
   {
@@ -969,9 +1156,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SMUSD for M3 and M0 processors
+   * @brief C custom defined SMUSD
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUSD(
+  __STATIC_FORCEINLINE uint32_t __SMUSD(
   uint32_t x,
   uint32_t y)
   {
@@ -981,9 +1168,9 @@ extern "C"
 
 
   /*
-   * @brief C custom defined SXTB16 for M3 and M0 processors
+   * @brief C custom defined SXTB16
    */
-  CMSIS_INLINE __STATIC_INLINE uint32_t __SXTB16(
+  __STATIC_FORCEINLINE uint32_t __SXTB16(
   uint32_t x)
   {
     return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
@@ -991,9 +1178,9 @@ extern "C"
   }
 
   /*
-   * @brief C custom defined SMMLA for M3 and M0 processors
+   * @brief C custom defined SMMLA
    */
-  CMSIS_INLINE __STATIC_INLINE int32_t __SMMLA(
+  __STATIC_FORCEINLINE int32_t __SMMLA(
   int32_t x,
   int32_t y,
   int32_t sum)
@@ -1009,9 +1196,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numTaps;        /**< number of filter coefficients in the filter. */
-    q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t numTaps;        /**< number of filter coefficients in the filter. */
+          q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
   } arm_fir_instance_q7;
 
   /**
@@ -1019,9 +1206,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
-    q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+          q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
   } arm_fir_instance_q15;
 
   /**
@@ -1029,9 +1216,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
-    q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
+          uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+          q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
   } arm_fir_instance_q31;
 
   /**
@@ -1039,12 +1226,11 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numTaps;     /**< number of filter coefficients in the filter. */
-    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+          uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+          float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    const float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
   } arm_fir_instance_f32;
 
-
   /**
    * @brief Processing function for the Q7 FIR filter.
    * @param[in]  S          points to an instance of the Q7 FIR filter structure.
@@ -1054,10 +1240,9 @@ extern "C"
    */
   void arm_fir_q7(
   const arm_fir_instance_q7 * S,
-  q7_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
-
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the Q7 FIR filter.
@@ -1068,12 +1253,11 @@ extern "C"
    * @param[in]     blockSize  number of samples that are processed.
    */
   void arm_fir_init_q7(
-  arm_fir_instance_q7 * S,
-  uint16_t numTaps,
-  q7_t * pCoeffs,
-  q7_t * pState,
-  uint32_t blockSize);
-
+        arm_fir_instance_q7 * S,
+        uint16_t numTaps,
+  const q7_t * pCoeffs,
+        q7_t * pState,
+        uint32_t blockSize);
 
   /**
    * @brief Processing function for the Q15 FIR filter.
@@ -1084,13 +1268,12 @@ extern "C"
    */
   void arm_fir_q15(
   const arm_fir_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
-
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
   /**
-   * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
+   * @brief Processing function for the fast Q15 FIR filter (fast version).
    * @param[in]  S          points to an instance of the Q15 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
@@ -1098,10 +1281,9 @@ extern "C"
    */
   void arm_fir_fast_q15(
   const arm_fir_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
-
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the Q15 FIR filter.
@@ -1110,16 +1292,16 @@ extern "C"
    * @param[in]     pCoeffs    points to the filter coefficients.
    * @param[in]     pState     points to the state buffer.
    * @param[in]     blockSize  number of samples that are processed at a time.
-   * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
-   * <code>numTaps</code> is not a supported value.
+   * @return     The function returns either
+   * <code>ARM_MATH_SUCCESS</code> if initialization was successful or
+   * <code>ARM_MATH_ARGUMENT_ERROR</code> if <code>numTaps</code> is not a supported value.
    */
   arm_status arm_fir_init_q15(
-  arm_fir_instance_q15 * S,
-  uint16_t numTaps,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  uint32_t blockSize);
-
+        arm_fir_instance_q15 * S,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
   /**
    * @brief Processing function for the Q31 FIR filter.
@@ -1130,24 +1312,22 @@ extern "C"
    */
   void arm_fir_q31(
   const arm_fir_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
-
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
   /**
-   * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
-   * @param[in]  S          points to an instance of the Q31 FIR structure.
+   * @brief Processing function for the fast Q31 FIR filter (fast version).
+   * @param[in]  S          points to an instance of the Q31 FIR filter structure.
    * @param[in]  pSrc       points to the block of input data.
    * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_fast_q31(
   const arm_fir_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
-
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the Q31 FIR filter.
@@ -1158,12 +1338,11 @@ extern "C"
    * @param[in]     blockSize  number of samples that are processed at a time.
    */
   void arm_fir_init_q31(
-  arm_fir_instance_q31 * S,
-  uint16_t numTaps,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  uint32_t blockSize);
-
+        arm_fir_instance_q31 * S,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
   /**
    * @brief Processing function for the floating-point FIR filter.
@@ -1174,10 +1353,9 @@ extern "C"
    */
   void arm_fir_f32(
   const arm_fir_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
-
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the floating-point FIR filter.
@@ -1188,22 +1366,21 @@ extern "C"
    * @param[in]     blockSize  number of samples that are processed at a time.
    */
   void arm_fir_init_f32(
-  arm_fir_instance_f32 * S,
-  uint16_t numTaps,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  uint32_t blockSize);
-
+        arm_fir_instance_f32 * S,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
   /**
    * @brief Instance structure for the Q15 Biquad cascade filter.
    */
   typedef struct
   {
-    int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-    int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
+          int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+          int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
   } arm_biquad_casd_df1_inst_q15;
 
   /**
@@ -1211,10 +1388,10 @@ extern "C"
    */
   typedef struct
   {
-    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-    uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
   } arm_biquad_casd_df1_inst_q31;
 
   /**
@@ -1222,12 +1399,11 @@ extern "C"
    */
   typedef struct
   {
-    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+          uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_casd_df1_inst_f32;
 
-
   /**
    * @brief Processing function for the Q15 Biquad cascade filter.
    * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
@@ -1237,10 +1413,9 @@ extern "C"
    */
   void arm_biquad_cascade_df1_q15(
   const arm_biquad_casd_df1_inst_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
-
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the Q15 Biquad cascade filter.
@@ -1251,12 +1426,11 @@ extern "C"
    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
    */
   void arm_biquad_cascade_df1_init_q15(
-  arm_biquad_casd_df1_inst_q15 * S,
-  uint8_t numStages,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  int8_t postShift);
-
+        arm_biquad_casd_df1_inst_q15 * S,
+        uint8_t numStages,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        int8_t postShift);
 
   /**
    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
@@ -1267,10 +1441,9 @@ extern "C"
    */
   void arm_biquad_cascade_df1_fast_q15(
   const arm_biquad_casd_df1_inst_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
-
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief Processing function for the Q31 Biquad cascade filter
@@ -1281,10 +1454,9 @@ extern "C"
    */
   void arm_biquad_cascade_df1_q31(
   const arm_biquad_casd_df1_inst_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
-
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
@@ -1295,10 +1467,9 @@ extern "C"
    */
   void arm_biquad_cascade_df1_fast_q31(
   const arm_biquad_casd_df1_inst_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
-
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the Q31 Biquad cascade filter.
@@ -1309,12 +1480,11 @@ extern "C"
    * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
    */
   void arm_biquad_cascade_df1_init_q31(
-  arm_biquad_casd_df1_inst_q31 * S,
-  uint8_t numStages,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  int8_t postShift);
-
+        arm_biquad_casd_df1_inst_q31 * S,
+        uint8_t numStages,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        int8_t postShift);
 
   /**
    * @brief Processing function for the floating-point Biquad cascade filter.
@@ -1325,10 +1495,9 @@ extern "C"
    */
   void arm_biquad_cascade_df1_f32(
   const arm_biquad_casd_df1_inst_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
-
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief  Initialization function for the floating-point Biquad cascade filter.
@@ -1338,11 +1507,10 @@ extern "C"
    * @param[in]     pState     points to the state buffer.
    */
   void arm_biquad_cascade_df1_init_f32(
-  arm_biquad_casd_df1_inst_f32 * S,
-  uint8_t numStages,
-  float32_t * pCoeffs,
-  float32_t * pState);
-
+        arm_biquad_casd_df1_inst_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
   /**
    * @brief Instance structure for the floating-point matrix structure.
@@ -1385,7 +1553,6 @@ extern "C"
     q31_t *pData;         /**< points to the data of the matrix. */
   } arm_matrix_instance_q31;
 
-
   /**
    * @brief Floating-point matrix addition.
    * @param[in]  pSrcA  points to the first input matrix structure
@@ -1394,11 +1561,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_add_f32(
+arm_status arm_mat_add_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
-  arm_matrix_instance_f32 * pDst);
-
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15 matrix addition.
@@ -1408,11 +1574,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_add_q15(
+arm_status arm_mat_add_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
-  arm_matrix_instance_q15 * pDst);
-
+        arm_matrix_instance_q15 * pDst);
 
   /**
    * @brief Q31 matrix addition.
@@ -1422,11 +1587,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_add_q31(
+arm_status arm_mat_add_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Floating-point, complex, matrix multiplication.
@@ -1436,11 +1600,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_cmplx_mult_f32(
+arm_status arm_mat_cmplx_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
-  arm_matrix_instance_f32 * pDst);
-
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15, complex,  matrix multiplication.
@@ -1450,12 +1613,11 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_cmplx_mult_q15(
+arm_status arm_mat_cmplx_mult_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
-  arm_matrix_instance_q15 * pDst,
-  q15_t * pScratch);
-
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pScratch);
 
   /**
    * @brief Q31, complex, matrix multiplication.
@@ -1465,11 +1627,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_cmplx_mult_q31(
+arm_status arm_mat_cmplx_mult_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Floating-point matrix transpose.
@@ -1478,10 +1639,9 @@ extern "C"
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_trans_f32(
+arm_status arm_mat_trans_f32(
   const arm_matrix_instance_f32 * pSrc,
-  arm_matrix_instance_f32 * pDst);
-
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15 matrix transpose.
@@ -1490,10 +1650,9 @@ extern "C"
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_trans_q15(
+arm_status arm_mat_trans_q15(
   const arm_matrix_instance_q15 * pSrc,
-  arm_matrix_instance_q15 * pDst);
-
+        arm_matrix_instance_q15 * pDst);
 
   /**
    * @brief Q31 matrix transpose.
@@ -1502,10 +1661,9 @@ extern "C"
    * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_trans_q31(
+arm_status arm_mat_trans_q31(
   const arm_matrix_instance_q31 * pSrc,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Floating-point matrix multiplication
@@ -1515,11 +1673,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_mult_f32(
+arm_status arm_mat_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
-  arm_matrix_instance_f32 * pDst);
-
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15 matrix multiplication
@@ -1530,12 +1687,11 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_mult_q15(
+arm_status arm_mat_mult_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
-  arm_matrix_instance_q15 * pDst,
-  q15_t * pState);
-
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pState);
 
   /**
    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
@@ -1546,12 +1702,11 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_mult_fast_q15(
+arm_status arm_mat_mult_fast_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
-  arm_matrix_instance_q15 * pDst,
-  q15_t * pState);
-
+        arm_matrix_instance_q15 * pDst,
+        q15_t * pState);
 
   /**
    * @brief Q31 matrix multiplication
@@ -1561,11 +1716,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_mult_q31(
+arm_status arm_mat_mult_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
@@ -1575,11 +1729,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_mult_fast_q31(
+arm_status arm_mat_mult_fast_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Floating-point matrix subtraction
@@ -1589,11 +1742,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_sub_f32(
+arm_status arm_mat_sub_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
-  arm_matrix_instance_f32 * pDst);
-
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15 matrix subtraction
@@ -1603,11 +1755,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_sub_q15(
+arm_status arm_mat_sub_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
-  arm_matrix_instance_q15 * pDst);
-
+        arm_matrix_instance_q15 * pDst);
 
   /**
    * @brief Q31 matrix subtraction
@@ -1617,11 +1768,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_sub_q31(
+arm_status arm_mat_sub_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
-  arm_matrix_instance_q31 * pDst);
-
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief Floating-point matrix scaling.
@@ -1631,11 +1781,10 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_scale_f32(
+arm_status arm_mat_scale_f32(
   const arm_matrix_instance_f32 * pSrc,
-  float32_t scale,
-  arm_matrix_instance_f32 * pDst);
-
+        float32_t scale,
+        arm_matrix_instance_f32 * pDst);
 
   /**
    * @brief Q15 matrix scaling.
@@ -1646,12 +1795,11 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_scale_q15(
+arm_status arm_mat_scale_q15(
   const arm_matrix_instance_q15 * pSrc,
-  q15_t scaleFract,
-  int32_t shift,
-  arm_matrix_instance_q15 * pDst);
-
+        q15_t scaleFract,
+        int32_t shift,
+        arm_matrix_instance_q15 * pDst);
 
   /**
    * @brief Q31 matrix scaling.
@@ -1662,12 +1810,11 @@ extern "C"
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-  arm_status arm_mat_scale_q31(
+arm_status arm_mat_scale_q31(
   const arm_matrix_instance_q31 * pSrc,
-  q31_t scaleFract,
-  int32_t shift,
-  arm_matrix_instance_q31 * pDst);
-
+        q31_t scaleFract,
+        int32_t shift,
+        arm_matrix_instance_q31 * pDst);
 
   /**
    * @brief  Q31 matrix initialization.
@@ -1676,12 +1823,11 @@ extern "C"
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-  void arm_mat_init_q31(
-  arm_matrix_instance_q31 * S,
-  uint16_t nRows,
-  uint16_t nColumns,
-  q31_t * pData);
-
+void arm_mat_init_q31(
+        arm_matrix_instance_q31 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q31_t * pData);
 
   /**
    * @brief  Q15 matrix initialization.
@@ -1690,12 +1836,11 @@ extern "C"
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-  void arm_mat_init_q15(
-  arm_matrix_instance_q15 * S,
-  uint16_t nRows,
-  uint16_t nColumns,
-  q15_t * pData);
-
+void arm_mat_init_q15(
+        arm_matrix_instance_q15 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        q15_t * pData);
 
   /**
    * @brief  Floating-point matrix initialization.
@@ -1704,12 +1849,11 @@ extern "C"
    * @param[in]     nColumns  number of columns in the matrix.
    * @param[in]     pData     points to the matrix data array.
    */
-  void arm_mat_init_f32(
-  arm_matrix_instance_f32 * S,
-  uint16_t nRows,
-  uint16_t nColumns,
-  float32_t * pData);
-
+void arm_mat_init_f32(
+        arm_matrix_instance_f32 * S,
+        uint16_t nRows,
+        uint16_t nColumns,
+        float32_t * pData);
 
 
   /**
@@ -1717,17 +1861,17 @@ extern "C"
    */
   typedef struct
   {
-    q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
+          q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
 #if !defined (ARM_MATH_DSP)
-    q15_t A1;
-    q15_t A2;
+          q15_t A1;
+          q15_t A2;
 #else
-    q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
+          q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
 #endif
-    q15_t state[3];     /**< The state array of length 3. */
-    q15_t Kp;           /**< The proportional gain. */
-    q15_t Ki;           /**< The integral gain. */
-    q15_t Kd;           /**< The derivative gain. */
+          q15_t state[3];     /**< The state array of length 3. */
+          q15_t Kp;           /**< The proportional gain. */
+          q15_t Ki;           /**< The integral gain. */
+          q15_t Kd;           /**< The derivative gain. */
   } arm_pid_instance_q15;
 
   /**
@@ -1735,13 +1879,13 @@ extern "C"
    */
   typedef struct
   {
-    q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
-    q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
-    q31_t A2;            /**< The derived gain, A2 = Kd . */
-    q31_t state[3];      /**< The state array of length 3. */
-    q31_t Kp;            /**< The proportional gain. */
-    q31_t Ki;            /**< The integral gain. */
-    q31_t Kd;            /**< The derivative gain. */
+          q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
+          q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
+          q31_t A2;            /**< The derived gain, A2 = Kd . */
+          q31_t state[3];      /**< The state array of length 3. */
+          q31_t Kp;            /**< The proportional gain. */
+          q31_t Ki;            /**< The integral gain. */
+          q31_t Kd;            /**< The derivative gain. */
   } arm_pid_instance_q31;
 
   /**
@@ -1749,13 +1893,13 @@ extern "C"
    */
   typedef struct
   {
-    float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
-    float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
-    float32_t A2;          /**< The derived gain, A2 = Kd . */
-    float32_t state[3];    /**< The state array of length 3. */
-    float32_t Kp;          /**< The proportional gain. */
-    float32_t Ki;          /**< The integral gain. */
-    float32_t Kd;          /**< The derivative gain. */
+          float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
+          float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
+          float32_t A2;          /**< The derived gain, A2 = Kd . */
+          float32_t state[3];    /**< The state array of length 3. */
+          float32_t Kp;          /**< The proportional gain. */
+          float32_t Ki;          /**< The integral gain. */
+          float32_t Kd;          /**< The derivative gain. */
   } arm_pid_instance_f32;
 
 
@@ -1766,8 +1910,8 @@ extern "C"
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
   void arm_pid_init_f32(
-  arm_pid_instance_f32 * S,
-  int32_t resetStateFlag);
+        arm_pid_instance_f32 * S,
+        int32_t resetStateFlag);
 
 
   /**
@@ -1775,7 +1919,7 @@ extern "C"
    * @param[in,out] S  is an instance of the floating-point PID Control structure
    */
   void arm_pid_reset_f32(
-  arm_pid_instance_f32 * S);
+        arm_pid_instance_f32 * S);
 
 
   /**
@@ -1784,8 +1928,8 @@ extern "C"
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
   void arm_pid_init_q31(
-  arm_pid_instance_q31 * S,
-  int32_t resetStateFlag);
+        arm_pid_instance_q31 * S,
+        int32_t resetStateFlag);
 
 
   /**
@@ -1794,7 +1938,7 @@ extern "C"
    */
 
   void arm_pid_reset_q31(
-  arm_pid_instance_q31 * S);
+        arm_pid_instance_q31 * S);
 
 
   /**
@@ -1803,8 +1947,8 @@ extern "C"
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
   void arm_pid_init_q15(
-  arm_pid_instance_q15 * S,
-  int32_t resetStateFlag);
+        arm_pid_instance_q15 * S,
+        int32_t resetStateFlag);
 
 
   /**
@@ -1812,7 +1956,7 @@ extern "C"
    * @param[in,out] S  points to an instance of the q15 PID Control structure
    */
   void arm_pid_reset_q15(
-  arm_pid_instance_q15 * S);
+        arm_pid_instance_q15 * S);
 
 
   /**
@@ -1820,10 +1964,10 @@ extern "C"
    */
   typedef struct
   {
-    uint32_t nValues;           /**< nValues */
-    float32_t x1;               /**< x1 */
-    float32_t xSpacing;         /**< xSpacing */
-    float32_t *pYData;          /**< pointer to the table of Y values */
+          uint32_t nValues;           /**< nValues */
+          float32_t x1;               /**< x1 */
+          float32_t xSpacing;         /**< xSpacing */
+          float32_t *pYData;          /**< pointer to the table of Y values */
   } arm_linear_interp_instance_f32;
 
   /**
@@ -1831,9 +1975,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numRows;   /**< number of rows in the data table. */
-    uint16_t numCols;   /**< number of columns in the data table. */
-    float32_t *pData;   /**< points to the data table. */
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          float32_t *pData;   /**< points to the data table. */
   } arm_bilinear_interp_instance_f32;
 
    /**
@@ -1841,9 +1985,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numRows;   /**< number of rows in the data table. */
-    uint16_t numCols;   /**< number of columns in the data table. */
-    q31_t *pData;       /**< points to the data table. */
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q31_t *pData;       /**< points to the data table. */
   } arm_bilinear_interp_instance_q31;
 
    /**
@@ -1851,9 +1995,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numRows;   /**< number of rows in the data table. */
-    uint16_t numCols;   /**< number of columns in the data table. */
-    q15_t *pData;       /**< points to the data table. */
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q15_t *pData;       /**< points to the data table. */
   } arm_bilinear_interp_instance_q15;
 
    /**
@@ -1861,9 +2005,9 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t numRows;   /**< number of rows in the data table. */
-    uint16_t numCols;   /**< number of columns in the data table. */
-    q7_t *pData;        /**< points to the data table. */
+          uint16_t numRows;   /**< number of rows in the data table. */
+          uint16_t numCols;   /**< number of columns in the data table. */
+          q7_t *pData;        /**< points to the data table. */
   } arm_bilinear_interp_instance_q7;
 
 
@@ -1875,10 +2019,10 @@ extern "C"
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_mult_q7(
-  q7_t * pSrcA,
-  q7_t * pSrcB,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -1889,10 +2033,10 @@ extern "C"
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_mult_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -1903,10 +2047,10 @@ extern "C"
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_mult_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -1917,10 +2061,10 @@ extern "C"
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_mult_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -1928,26 +2072,26 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t fftLen;                 /**< length of the FFT. */
-    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
-    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   } arm_cfft_radix2_instance_q15;
 
 /* Deprecated */
   arm_status arm_cfft_radix2_init_q15(
-  arm_cfft_radix2_instance_q15 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix2_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
 /* Deprecated */
   void arm_cfft_radix2_q15(
   const arm_cfft_radix2_instance_q15 * S,
-  q15_t * pSrc);
+        q15_t * pSrc);
 
 
   /**
@@ -1955,292 +2099,309 @@ extern "C"
    */
   typedef struct
   {
-    uint16_t fftLen;                 /**< length of the FFT. */
-    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
-    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   } arm_cfft_radix4_instance_q15;
 
 /* Deprecated */
   arm_status arm_cfft_radix4_init_q15(
-  arm_cfft_radix4_instance_q15 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix4_instance_q15 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
 /* Deprecated */
   void arm_cfft_radix4_q15(
   const arm_cfft_radix4_instance_q15 * S,
-  q15_t * pSrc);
+        q15_t * pSrc);
 
   /**
    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                 /**< length of the FFT. */
-    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
-    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   } arm_cfft_radix2_instance_q31;
 
 /* Deprecated */
   arm_status arm_cfft_radix2_init_q31(
-  arm_cfft_radix2_instance_q31 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix2_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
 /* Deprecated */
   void arm_cfft_radix2_q31(
   const arm_cfft_radix2_instance_q31 * S,
-  q31_t * pSrc);
+        q31_t * pSrc);
 
   /**
    * @brief Instance structure for the Q31 CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                 /**< length of the FFT. */
-    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
-    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          uint16_t fftLen;                 /**< length of the FFT. */
+          uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
   } arm_cfft_radix4_instance_q31;
 
 /* Deprecated */
   void arm_cfft_radix4_q31(
   const arm_cfft_radix4_instance_q31 * S,
-  q31_t * pSrc);
+        q31_t * pSrc);
 
 /* Deprecated */
   arm_status arm_cfft_radix4_init_q31(
-  arm_cfft_radix4_instance_q31 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix4_instance_q31 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                   /**< length of the FFT. */
-    uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
-    uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen;             /**< value of 1/fftLen. */
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
   } arm_cfft_radix2_instance_f32;
 
 /* Deprecated */
   arm_status arm_cfft_radix2_init_f32(
-  arm_cfft_radix2_instance_f32 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix2_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
 /* Deprecated */
   void arm_cfft_radix2_f32(
   const arm_cfft_radix2_instance_f32 * S,
-  float32_t * pSrc);
+        float32_t * pSrc);
 
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                   /**< length of the FFT. */
-    uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
-    uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
-    uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
-    uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen;             /**< value of 1/fftLen. */
+          uint16_t fftLen;                   /**< length of the FFT. */
+          uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+          uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    const float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
+    const uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
+          uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+          uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+          float32_t onebyfftLen;             /**< value of 1/fftLen. */
   } arm_cfft_radix4_instance_f32;
 
 /* Deprecated */
   arm_status arm_cfft_radix4_init_f32(
-  arm_cfft_radix4_instance_f32 * S,
-  uint16_t fftLen,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        arm_cfft_radix4_instance_f32 * S,
+        uint16_t fftLen,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
 /* Deprecated */
   void arm_cfft_radix4_f32(
   const arm_cfft_radix4_instance_f32 * S,
-  float32_t * pSrc);
+        float32_t * pSrc);
 
   /**
    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                   /**< length of the FFT. */
+          uint16_t fftLen;                   /**< length of the FFT. */
     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
-    uint16_t bitRevLength;             /**< bit reversal table length. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
   } arm_cfft_instance_q15;
 
 void arm_cfft_q15(
     const arm_cfft_instance_q15 * S,
-    q15_t * p1,
-    uint8_t ifftFlag,
-    uint8_t bitReverseFlag);
+          q15_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
 
   /**
    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                   /**< length of the FFT. */
+          uint16_t fftLen;                   /**< length of the FFT. */
     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
-    uint16_t bitRevLength;             /**< bit reversal table length. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
   } arm_cfft_instance_q31;
 
 void arm_cfft_q31(
     const arm_cfft_instance_q31 * S,
-    q31_t * p1,
-    uint8_t ifftFlag,
-    uint8_t bitReverseFlag);
+          q31_t * p1,
+          uint8_t ifftFlag,
+          uint8_t bitReverseFlag);
 
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
   typedef struct
   {
-    uint16_t fftLen;                   /**< length of the FFT. */
+          uint16_t fftLen;                   /**< length of the FFT. */
     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
-    uint16_t bitRevLength;             /**< bit reversal table length. */
+          uint16_t bitRevLength;             /**< bit reversal table length. */
   } arm_cfft_instance_f32;
 
   void arm_cfft_f32(
   const arm_cfft_instance_f32 * S,
-  float32_t * p1,
-  uint8_t ifftFlag,
-  uint8_t bitReverseFlag);
+        float32_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag);
 
   /**
    * @brief Instance structure for the Q15 RFFT/RIFFT function.
    */
   typedef struct
   {
-    uint32_t fftLenReal;                      /**< length of the real FFT. */
-    uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
-    q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
+          uint32_t fftLenReal;                      /**< length of the real FFT. */
+          uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
+    const q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
     const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
   } arm_rfft_instance_q15;
 
   arm_status arm_rfft_init_q15(
-  arm_rfft_instance_q15 * S,
-  uint32_t fftLenReal,
-  uint32_t ifftFlagR,
-  uint32_t bitReverseFlag);
+        arm_rfft_instance_q15 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
 
   void arm_rfft_q15(
   const arm_rfft_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst);
+        q15_t * pSrc,
+        q15_t * pDst);
 
   /**
    * @brief Instance structure for the Q31 RFFT/RIFFT function.
    */
   typedef struct
   {
-    uint32_t fftLenReal;                        /**< length of the real FFT. */
-    uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
-    q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
+    const q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
     const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
   } arm_rfft_instance_q31;
 
   arm_status arm_rfft_init_q31(
-  arm_rfft_instance_q31 * S,
-  uint32_t fftLenReal,
-  uint32_t ifftFlagR,
-  uint32_t bitReverseFlag);
+        arm_rfft_instance_q31 * S,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
 
   void arm_rfft_q31(
   const arm_rfft_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst);
+        q31_t * pSrc,
+        q31_t * pDst);
 
   /**
    * @brief Instance structure for the floating-point RFFT/RIFFT function.
    */
   typedef struct
   {
-    uint32_t fftLenReal;                        /**< length of the real FFT. */
-    uint16_t fftLenBy2;                         /**< length of the complex FFT. */
-    uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
-    uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
-    uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
-    float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
-    float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
-    arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
+          uint32_t fftLenReal;                        /**< length of the real FFT. */
+          uint16_t fftLenBy2;                         /**< length of the complex FFT. */
+          uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+          uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+          uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    const float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
+    const float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
+          arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
   } arm_rfft_instance_f32;
 
   arm_status arm_rfft_init_f32(
-  arm_rfft_instance_f32 * S,
-  arm_cfft_radix4_instance_f32 * S_CFFT,
-  uint32_t fftLenReal,
-  uint32_t ifftFlagR,
-  uint32_t bitReverseFlag);
+        arm_rfft_instance_f32 * S,
+        arm_cfft_radix4_instance_f32 * S_CFFT,
+        uint32_t fftLenReal,
+        uint32_t ifftFlagR,
+        uint32_t bitReverseFlag);
 
   void arm_rfft_f32(
   const arm_rfft_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst);
+        float32_t * pSrc,
+        float32_t * pDst);
 
   /**
    * @brief Instance structure for the floating-point RFFT/RIFFT function.
    */
 typedef struct
   {
-    arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
-    uint16_t fftLenRFFT;             /**< length of the real sequence */
-    float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
+          arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
+          uint16_t fftLenRFFT;             /**< length of the real sequence */
+    const float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
   } arm_rfft_fast_instance_f32 ;
 
 arm_status arm_rfft_fast_init_f32 (
-   arm_rfft_fast_instance_f32 * S,
-   uint16_t fftLen);
+         arm_rfft_fast_instance_f32 * S,
+         uint16_t fftLen);
+
+arm_status arm_rfft_32_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_64_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_128_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_256_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_512_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_1024_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_2048_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
+
+arm_status arm_rfft_4096_fast_init_f32 ( arm_rfft_fast_instance_f32 * S );
 
-void arm_rfft_fast_f32(
-  arm_rfft_fast_instance_f32 * S,
-  float32_t * p, float32_t * pOut,
-  uint8_t ifftFlag);
+
+  void arm_rfft_fast_f32(
+        arm_rfft_fast_instance_f32 * S,
+        float32_t * p, float32_t * pOut,
+        uint8_t ifftFlag);
 
   /**
    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
    */
   typedef struct
   {
-    uint16_t N;                          /**< length of the DCT4. */
-    uint16_t Nby2;                       /**< half of the length of the DCT4. */
-    float32_t normalize;                 /**< normalizing factor. */
-    float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
-    float32_t *pCosFactor;               /**< points to the cosFactor table. */
-    arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          float32_t normalize;                 /**< normalizing factor. */
+    const float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    const float32_t *pCosFactor;               /**< points to the cosFactor table. */
+          arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_f32;
 
 
@@ -2255,12 +2416,12 @@ void arm_rfft_fast_f32(
    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
    */
   arm_status arm_dct4_init_f32(
-  arm_dct4_instance_f32 * S,
-  arm_rfft_instance_f32 * S_RFFT,
-  arm_cfft_radix4_instance_f32 * S_CFFT,
-  uint16_t N,
-  uint16_t Nby2,
-  float32_t normalize);
+        arm_dct4_instance_f32 * S,
+        arm_rfft_instance_f32 * S_RFFT,
+        arm_cfft_radix4_instance_f32 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        float32_t normalize);
 
 
   /**
@@ -2271,8 +2432,8 @@ void arm_rfft_fast_f32(
    */
   void arm_dct4_f32(
   const arm_dct4_instance_f32 * S,
-  float32_t * pState,
-  float32_t * pInlineBuffer);
+        float32_t * pState,
+        float32_t * pInlineBuffer);
 
 
   /**
@@ -2280,13 +2441,13 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t N;                          /**< length of the DCT4. */
-    uint16_t Nby2;                       /**< half of the length of the DCT4. */
-    q31_t normalize;                     /**< normalizing factor. */
-    q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
-    q31_t *pCosFactor;                   /**< points to the cosFactor table. */
-    arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q31_t normalize;                     /**< normalizing factor. */
+    const q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q31_t *pCosFactor;                   /**< points to the cosFactor table. */
+          arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_q31;
 
 
@@ -2301,12 +2462,12 @@ void arm_rfft_fast_f32(
    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
    */
   arm_status arm_dct4_init_q31(
-  arm_dct4_instance_q31 * S,
-  arm_rfft_instance_q31 * S_RFFT,
-  arm_cfft_radix4_instance_q31 * S_CFFT,
-  uint16_t N,
-  uint16_t Nby2,
-  q31_t normalize);
+        arm_dct4_instance_q31 * S,
+        arm_rfft_instance_q31 * S_RFFT,
+        arm_cfft_radix4_instance_q31 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q31_t normalize);
 
 
   /**
@@ -2317,8 +2478,8 @@ void arm_rfft_fast_f32(
    */
   void arm_dct4_q31(
   const arm_dct4_instance_q31 * S,
-  q31_t * pState,
-  q31_t * pInlineBuffer);
+        q31_t * pState,
+        q31_t * pInlineBuffer);
 
 
   /**
@@ -2326,13 +2487,13 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t N;                          /**< length of the DCT4. */
-    uint16_t Nby2;                       /**< half of the length of the DCT4. */
-    q15_t normalize;                     /**< normalizing factor. */
-    q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
-    q15_t *pCosFactor;                   /**< points to the cosFactor table. */
-    arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
-    arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
+          uint16_t N;                          /**< length of the DCT4. */
+          uint16_t Nby2;                       /**< half of the length of the DCT4. */
+          q15_t normalize;                     /**< normalizing factor. */
+    const q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    const q15_t *pCosFactor;                   /**< points to the cosFactor table. */
+          arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
+          arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_q15;
 
 
@@ -2347,12 +2508,12 @@ void arm_rfft_fast_f32(
    * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
    */
   arm_status arm_dct4_init_q15(
-  arm_dct4_instance_q15 * S,
-  arm_rfft_instance_q15 * S_RFFT,
-  arm_cfft_radix4_instance_q15 * S_CFFT,
-  uint16_t N,
-  uint16_t Nby2,
-  q15_t normalize);
+        arm_dct4_instance_q15 * S,
+        arm_rfft_instance_q15 * S_RFFT,
+        arm_cfft_radix4_instance_q15 * S_CFFT,
+        uint16_t N,
+        uint16_t Nby2,
+        q15_t normalize);
 
 
   /**
@@ -2363,8 +2524,8 @@ void arm_rfft_fast_f32(
    */
   void arm_dct4_q15(
   const arm_dct4_instance_q15 * S,
-  q15_t * pState,
-  q15_t * pInlineBuffer);
+        q15_t * pState,
+        q15_t * pInlineBuffer);
 
 
   /**
@@ -2375,10 +2536,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_add_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2389,10 +2550,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_add_q7(
-  q7_t * pSrcA,
-  q7_t * pSrcB,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2403,10 +2564,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_add_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2417,10 +2578,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_add_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2431,10 +2592,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_sub_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2445,10 +2606,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_sub_q7(
-  q7_t * pSrcA,
-  q7_t * pSrcB,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2459,10 +2620,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_sub_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2473,10 +2634,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_sub_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2487,10 +2648,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_scale_f32(
-  float32_t * pSrc,
-  float32_t scale,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t scale,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2502,11 +2663,11 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize   number of samples in the vector
    */
   void arm_scale_q7(
-  q7_t * pSrc,
-  q7_t scaleFract,
-  int8_t shift,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        q7_t scaleFract,
+        int8_t shift,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2518,11 +2679,11 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize   number of samples in the vector
    */
   void arm_scale_q15(
-  q15_t * pSrc,
-  q15_t scaleFract,
-  int8_t shift,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t scaleFract,
+        int8_t shift,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2534,11 +2695,11 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize   number of samples in the vector
    */
   void arm_scale_q31(
-  q31_t * pSrc,
-  q31_t scaleFract,
-  int8_t shift,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t scaleFract,
+        int8_t shift,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2548,9 +2709,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_abs_q7(
-  q7_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2560,9 +2721,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_abs_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2572,9 +2733,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_abs_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2584,9 +2745,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in each vector
    */
   void arm_abs_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2597,10 +2758,10 @@ void arm_rfft_fast_f32(
    * @param[out] result     output result returned here
    */
   void arm_dot_prod_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  uint32_t blockSize,
-  float32_t * result);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t blockSize,
+        float32_t * result);
 
 
   /**
@@ -2611,10 +2772,10 @@ void arm_rfft_fast_f32(
    * @param[out] result     output result returned here
    */
   void arm_dot_prod_q7(
-  q7_t * pSrcA,
-  q7_t * pSrcB,
-  uint32_t blockSize,
-  q31_t * result);
+  const q7_t * pSrcA,
+  const q7_t * pSrcB,
+        uint32_t blockSize,
+        q31_t * result);
 
 
   /**
@@ -2625,10 +2786,10 @@ void arm_rfft_fast_f32(
    * @param[out] result     output result returned here
    */
   void arm_dot_prod_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  uint32_t blockSize,
-  q63_t * result);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
 
 
   /**
@@ -2639,10 +2800,10 @@ void arm_rfft_fast_f32(
    * @param[out] result     output result returned here
    */
   void arm_dot_prod_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  uint32_t blockSize,
-  q63_t * result);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t blockSize,
+        q63_t * result);
 
 
   /**
@@ -2653,10 +2814,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_shift_q7(
-  q7_t * pSrc,
-  int8_t shiftBits,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        int8_t shiftBits,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2667,10 +2828,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_shift_q15(
-  q15_t * pSrc,
-  int8_t shiftBits,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        int8_t shiftBits,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2681,10 +2842,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_shift_q31(
-  q31_t * pSrc,
-  int8_t shiftBits,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        int8_t shiftBits,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2695,10 +2856,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_offset_f32(
-  float32_t * pSrc,
-  float32_t offset,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t offset,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2709,10 +2870,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_offset_q7(
-  q7_t * pSrc,
-  q7_t offset,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        q7_t offset,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2723,10 +2884,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_offset_q15(
-  q15_t * pSrc,
-  q15_t offset,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t offset,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2737,10 +2898,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_offset_q31(
-  q31_t * pSrc,
-  q31_t offset,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t offset,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2750,9 +2911,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_negate_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2762,9 +2923,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_negate_q7(
-  q7_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2774,9 +2935,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_negate_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2786,9 +2947,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples in the vector
    */
   void arm_negate_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2798,9 +2959,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2810,9 +2971,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q7(
-  q7_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q7_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2822,9 +2983,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2834,9 +2995,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2846,9 +3007,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_f32(
-  float32_t value,
-  float32_t * pDst,
-  uint32_t blockSize);
+        float32_t value,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2858,9 +3019,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q7(
-  q7_t value,
-  q7_t * pDst,
-  uint32_t blockSize);
+        q7_t value,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2870,9 +3031,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q15(
-  q15_t value,
-  q15_t * pDst,
-  uint32_t blockSize);
+        q15_t value,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -2882,9 +3043,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q31(
-  q31_t value,
-  q31_t * pDst,
-  uint32_t blockSize);
+        q31_t value,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
 /**
@@ -2896,11 +3057,11 @@ void arm_rfft_fast_f32(
  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
   void arm_conv_f32(
-  float32_t * pSrcA,
-  uint32_t srcALen,
-  float32_t * pSrcB,
-  uint32_t srcBLen,
-  float32_t * pDst);
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst);
 
 
   /**
@@ -2914,13 +3075,13 @@ void arm_rfft_fast_f32(
    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
    */
   void arm_conv_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
 /**
@@ -2932,11 +3093,11 @@ void arm_rfft_fast_f32(
  * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
   void arm_conv_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
 
   /**
@@ -2948,11 +3109,11 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
   void arm_conv_fast_q15(
-          q15_t * pSrcA,
-          uint32_t srcALen,
-          q15_t * pSrcB,
-          uint32_t srcBLen,
-          q15_t * pDst);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
 
   /**
@@ -2966,13 +3127,13 @@ void arm_rfft_fast_f32(
    * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
    */
   void arm_conv_fast_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
   /**
@@ -2984,11 +3145,11 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
   void arm_conv_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst);
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
 
   /**
@@ -3000,11 +3161,11 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
   void arm_conv_fast_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst);
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
 
     /**
@@ -3018,13 +3179,13 @@ void arm_rfft_fast_f32(
    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    */
   void arm_conv_opt_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
   /**
@@ -3036,11 +3197,11 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
    */
   void arm_conv_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst);
 
 
   /**
@@ -3055,13 +3216,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_f32(
-  float32_t * pSrcA,
-  uint32_t srcALen,
-  float32_t * pSrcB,
-  uint32_t srcBLen,
-  float32_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3078,15 +3239,15 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
   /**
@@ -3101,13 +3262,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3122,13 +3283,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_fast_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3145,15 +3306,15 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_fast_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
   /**
@@ -3168,13 +3329,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3189,13 +3350,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_fast_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3212,15 +3373,15 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_opt_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
 /**
@@ -3235,13 +3396,13 @@ void arm_rfft_fast_f32(
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
   arm_status arm_conv_partial_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst,
-  uint32_t firstIndex,
-  uint32_t numPoints);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        uint32_t firstIndex,
+        uint32_t numPoints);
 
 
   /**
@@ -3249,10 +3410,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t M;                  /**< decimation factor. */
-    uint16_t numTaps;           /**< number of coefficients in the filter. */
-    q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
-    q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+          q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_q15;
 
   /**
@@ -3260,56 +3421,57 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t M;                  /**< decimation factor. */
-    uint16_t numTaps;           /**< number of coefficients in the filter. */
-    q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
-    q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+          q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_q31;
 
-  /**
-   * @brief Instance structure for the floating-point FIR decimator.
-   */
-  typedef struct
+/**
+  @brief Instance structure for floating-point FIR decimator.
+ */
+typedef struct
   {
-    uint8_t M;                  /**< decimation factor. */
-    uint16_t numTaps;           /**< number of coefficients in the filter. */
-    float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
-    float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          uint8_t M;                  /**< decimation factor. */
+          uint16_t numTaps;           /**< number of coefficients in the filter. */
+    const float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
+          float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_f32;
 
 
-  /**
-   * @brief Processing function for the floating-point FIR decimator.
-   * @param[in]  S          points to an instance of the floating-point FIR decimator structure.
-   * @param[in]  pSrc       points to the block of input data.
-   * @param[out] pDst       points to the block of output data
-   * @param[in]  blockSize  number of input samples to process per call.
-   */
-  void arm_fir_decimate_f32(
+/**
+  @brief         Processing function for floating-point FIR decimator.
+  @param[in]     S         points to an instance of the floating-point FIR decimator structure
+  @param[in]     pSrc      points to the block of input data
+  @param[out]    pDst      points to the block of output data
+  @param[in]     blockSize number of samples to process
+ */
+void arm_fir_decimate_f32(
   const arm_fir_decimate_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
-  /**
-   * @brief  Initialization function for the floating-point FIR decimator.
-   * @param[in,out] S          points to an instance of the floating-point FIR decimator structure.
-   * @param[in]     numTaps    number of coefficients in the filter.
-   * @param[in]     M          decimation factor.
-   * @param[in]     pCoeffs    points to the filter coefficients.
-   * @param[in]     pState     points to the state buffer.
-   * @param[in]     blockSize  number of input samples to process per call.
-   * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
-   * <code>blockSize</code> is not a multiple of <code>M</code>.
-   */
-  arm_status arm_fir_decimate_init_f32(
-  arm_fir_decimate_instance_f32 * S,
-  uint16_t numTaps,
-  uint8_t M,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  uint32_t blockSize);
+/**
+  @brief         Initialization function for the floating-point FIR decimator.
+  @param[in,out] S          points to an instance of the floating-point FIR decimator structure
+  @param[in]     numTaps    number of coefficients in the filter
+  @param[in]     M          decimation factor
+  @param[in]     pCoeffs    points to the filter coefficients
+  @param[in]     pState     points to the state buffer
+  @param[in]     blockSize  number of input samples to process per call
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS      : Operation successful
+                   - \ref ARM_MATH_LENGTH_ERROR : <code>blockSize</code> is not a multiple of <code>M</code>
+ */
+arm_status arm_fir_decimate_init_f32(
+        arm_fir_decimate_instance_f32 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3321,9 +3483,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_decimate_q15(
   const arm_fir_decimate_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3335,9 +3497,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_decimate_fast_q15(
   const arm_fir_decimate_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3352,12 +3514,12 @@ void arm_rfft_fast_f32(
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
   arm_status arm_fir_decimate_init_q15(
-  arm_fir_decimate_instance_q15 * S,
-  uint16_t numTaps,
-  uint8_t M,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  uint32_t blockSize);
+        arm_fir_decimate_instance_q15 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3369,9 +3531,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_decimate_q31(
   const arm_fir_decimate_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
   /**
    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
@@ -3381,10 +3543,10 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of input samples to process per call.
    */
   void arm_fir_decimate_fast_q31(
-  arm_fir_decimate_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const arm_fir_decimate_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3399,12 +3561,12 @@ void arm_rfft_fast_f32(
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
   arm_status arm_fir_decimate_init_q31(
-  arm_fir_decimate_instance_q31 * S,
-  uint16_t numTaps,
-  uint8_t M,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  uint32_t blockSize);
+        arm_fir_decimate_instance_q31 * S,
+        uint16_t numTaps,
+        uint8_t M,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3412,10 +3574,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t L;                      /**< upsample factor. */
-    uint16_t phaseLength;           /**< length of each polyphase filter component. */
-    q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
-    q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+        uint8_t L;                      /**< upsample factor. */
+        uint16_t phaseLength;           /**< length of each polyphase filter component. */
+  const q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+        q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
   } arm_fir_interpolate_instance_q15;
 
   /**
@@ -3423,10 +3585,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t L;                      /**< upsample factor. */
-    uint16_t phaseLength;           /**< length of each polyphase filter component. */
-    q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
-    q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+        uint8_t L;                      /**< upsample factor. */
+        uint16_t phaseLength;           /**< length of each polyphase filter component. */
+  const q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+        q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
   } arm_fir_interpolate_instance_q31;
 
   /**
@@ -3434,10 +3596,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t L;                     /**< upsample factor. */
-    uint16_t phaseLength;          /**< length of each polyphase filter component. */
-    float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
-    float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
+        uint8_t L;                     /**< upsample factor. */
+        uint16_t phaseLength;          /**< length of each polyphase filter component. */
+  const float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
+        float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
   } arm_fir_interpolate_instance_f32;
 
 
@@ -3450,9 +3612,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_interpolate_q15(
   const arm_fir_interpolate_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3467,12 +3629,12 @@ void arm_rfft_fast_f32(
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
   arm_status arm_fir_interpolate_init_q15(
-  arm_fir_interpolate_instance_q15 * S,
-  uint8_t L,
-  uint16_t numTaps,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  uint32_t blockSize);
+        arm_fir_interpolate_instance_q15 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3484,9 +3646,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_interpolate_q31(
   const arm_fir_interpolate_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3501,12 +3663,12 @@ void arm_rfft_fast_f32(
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
   arm_status arm_fir_interpolate_init_q31(
-  arm_fir_interpolate_instance_q31 * S,
-  uint8_t L,
-  uint16_t numTaps,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  uint32_t blockSize);
+        arm_fir_interpolate_instance_q31 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3518,9 +3680,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_interpolate_f32(
   const arm_fir_interpolate_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3535,12 +3697,12 @@ void arm_rfft_fast_f32(
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
   arm_status arm_fir_interpolate_init_f32(
-  arm_fir_interpolate_instance_f32 * S,
-  uint8_t L,
-  uint16_t numTaps,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  uint32_t blockSize);
+        arm_fir_interpolate_instance_f32 * S,
+        uint8_t L,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3548,10 +3710,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
-    q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
-    uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
+          uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
   } arm_biquad_cas_df1_32x64_ins_q31;
 
 
@@ -3563,9 +3725,9 @@ void arm_rfft_fast_f32(
    */
   void arm_biquad_cas_df1_32x64_q31(
   const arm_biquad_cas_df1_32x64_ins_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+        q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3576,11 +3738,11 @@ void arm_rfft_fast_f32(
    * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
    */
   void arm_biquad_cas_df1_32x64_init_q31(
-  arm_biquad_cas_df1_32x64_ins_q31 * S,
-  uint8_t numStages,
-  q31_t * pCoeffs,
-  q63_t * pState,
-  uint8_t postShift);
+        arm_biquad_cas_df1_32x64_ins_q31 * S,
+        uint8_t numStages,
+  const q31_t * pCoeffs,
+        q63_t * pState,
+        uint8_t postShift);
 
 
   /**
@@ -3588,9 +3750,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
-    float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_cascade_df2T_instance_f32;
 
   /**
@@ -3598,9 +3760,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
-    float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    const float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_cascade_stereo_df2T_instance_f32;
 
   /**
@@ -3608,9 +3770,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
-    float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+          uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+          float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+          float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_cascade_df2T_instance_f64;
 
 
@@ -3623,9 +3785,9 @@ void arm_rfft_fast_f32(
    */
   void arm_biquad_cascade_df2T_f32(
   const arm_biquad_cascade_df2T_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3637,9 +3799,9 @@ void arm_rfft_fast_f32(
    */
   void arm_biquad_cascade_stereo_df2T_f32(
   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3651,11 +3813,17 @@ void arm_rfft_fast_f32(
    */
   void arm_biquad_cascade_df2T_f64(
   const arm_biquad_cascade_df2T_instance_f64 * S,
-  float64_t * pSrc,
-  float64_t * pDst,
-  uint32_t blockSize);
+        float64_t * pSrc,
+        float64_t * pDst,
+        uint32_t blockSize);
 
 
+#if defined(ARM_MATH_NEON) 
+void arm_biquad_cascade_df2T_compute_coefs_f32(
+  arm_biquad_cascade_df2T_instance_f32 * S,
+  uint8_t numStages,
+  float32_t * pCoeffs);
+#endif
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
    * @param[in,out] S          points to an instance of the filter data structure.
@@ -3664,10 +3832,10 @@ void arm_rfft_fast_f32(
    * @param[in]     pState     points to the state buffer.
    */
   void arm_biquad_cascade_df2T_init_f32(
-  arm_biquad_cascade_df2T_instance_f32 * S,
-  uint8_t numStages,
-  float32_t * pCoeffs,
-  float32_t * pState);
+        arm_biquad_cascade_df2T_instance_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
 
   /**
@@ -3678,10 +3846,10 @@ void arm_rfft_fast_f32(
    * @param[in]     pState     points to the state buffer.
    */
   void arm_biquad_cascade_stereo_df2T_init_f32(
-  arm_biquad_cascade_stereo_df2T_instance_f32 * S,
-  uint8_t numStages,
-  float32_t * pCoeffs,
-  float32_t * pState);
+        arm_biquad_cascade_stereo_df2T_instance_f32 * S,
+        uint8_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
 
   /**
@@ -3692,10 +3860,10 @@ void arm_rfft_fast_f32(
    * @param[in]     pState     points to the state buffer.
    */
   void arm_biquad_cascade_df2T_init_f64(
-  arm_biquad_cascade_df2T_instance_f64 * S,
-  uint8_t numStages,
-  float64_t * pCoeffs,
-  float64_t * pState);
+        arm_biquad_cascade_df2T_instance_f64 * S,
+        uint8_t numStages,
+        float64_t * pCoeffs,
+        float64_t * pState);
 
 
   /**
@@ -3703,9 +3871,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of filter stages. */
-    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
-    q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
+          uint16_t numStages;                  /**< number of filter stages. */
+          q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    const q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_q15;
 
   /**
@@ -3713,9 +3881,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of filter stages. */
-    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
-    q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
+          uint16_t numStages;                  /**< number of filter stages. */
+          q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    const q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_q31;
 
   /**
@@ -3723,9 +3891,9 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of filter stages. */
-    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
-    float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
+          uint16_t numStages;                  /**< number of filter stages. */
+          float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
+    const float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_f32;
 
 
@@ -3737,10 +3905,10 @@ void arm_rfft_fast_f32(
    * @param[in] pState     points to the state buffer.  The array is of length numStages.
    */
   void arm_fir_lattice_init_q15(
-  arm_fir_lattice_instance_q15 * S,
-  uint16_t numStages,
-  q15_t * pCoeffs,
-  q15_t * pState);
+        arm_fir_lattice_instance_q15 * S,
+        uint16_t numStages,
+  const q15_t * pCoeffs,
+        q15_t * pState);
 
 
   /**
@@ -3752,9 +3920,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_lattice_q15(
   const arm_fir_lattice_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3765,10 +3933,10 @@ void arm_rfft_fast_f32(
    * @param[in] pState     points to the state buffer.   The array is of length numStages.
    */
   void arm_fir_lattice_init_q31(
-  arm_fir_lattice_instance_q31 * S,
-  uint16_t numStages,
-  q31_t * pCoeffs,
-  q31_t * pState);
+        arm_fir_lattice_instance_q31 * S,
+        uint16_t numStages,
+  const q31_t * pCoeffs,
+        q31_t * pState);
 
 
   /**
@@ -3780,9 +3948,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_lattice_q31(
   const arm_fir_lattice_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
 /**
@@ -3793,10 +3961,10 @@ void arm_rfft_fast_f32(
  * @param[in] pState     points to the state buffer.  The array is of length numStages.
  */
   void arm_fir_lattice_init_f32(
-  arm_fir_lattice_instance_f32 * S,
-  uint16_t numStages,
-  float32_t * pCoeffs,
-  float32_t * pState);
+        arm_fir_lattice_instance_f32 * S,
+        uint16_t numStages,
+  const float32_t * pCoeffs,
+        float32_t * pState);
 
 
   /**
@@ -3808,9 +3976,9 @@ void arm_rfft_fast_f32(
    */
   void arm_fir_lattice_f32(
   const arm_fir_lattice_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3818,10 +3986,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of stages in the filter. */
-    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
-    q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+          q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+          q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_q15;
 
   /**
@@ -3829,10 +3997,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of stages in the filter. */
-    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
-    q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+          q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+          q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_q31;
 
   /**
@@ -3840,10 +4008,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numStages;                  /**< number of stages in the filter. */
-    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
-    float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
-    float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
+          uint16_t numStages;                  /**< number of stages in the filter. */
+          float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
+          float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
+          float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_f32;
 
 
@@ -3856,9 +4024,9 @@ void arm_rfft_fast_f32(
    */
   void arm_iir_lattice_f32(
   const arm_iir_lattice_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3871,12 +4039,12 @@ void arm_rfft_fast_f32(
    * @param[in] blockSize  number of samples to process.
    */
   void arm_iir_lattice_init_f32(
-  arm_iir_lattice_instance_f32 * S,
-  uint16_t numStages,
-  float32_t * pkCoeffs,
-  float32_t * pvCoeffs,
-  float32_t * pState,
-  uint32_t blockSize);
+        arm_iir_lattice_instance_f32 * S,
+        uint16_t numStages,
+        float32_t * pkCoeffs,
+        float32_t * pvCoeffs,
+        float32_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3888,9 +4056,9 @@ void arm_rfft_fast_f32(
    */
   void arm_iir_lattice_q31(
   const arm_iir_lattice_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -3903,12 +4071,12 @@ void arm_rfft_fast_f32(
    * @param[in] blockSize  number of samples to process.
    */
   void arm_iir_lattice_init_q31(
-  arm_iir_lattice_instance_q31 * S,
-  uint16_t numStages,
-  q31_t * pkCoeffs,
-  q31_t * pvCoeffs,
-  q31_t * pState,
-  uint32_t blockSize);
+        arm_iir_lattice_instance_q31 * S,
+        uint16_t numStages,
+        q31_t * pkCoeffs,
+        q31_t * pvCoeffs,
+        q31_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3920,9 +4088,9 @@ void arm_rfft_fast_f32(
    */
   void arm_iir_lattice_q15(
   const arm_iir_lattice_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
 /**
@@ -3935,12 +4103,12 @@ void arm_rfft_fast_f32(
  * @param[in] blockSize  number of samples to process per call.
  */
   void arm_iir_lattice_init_q15(
-  arm_iir_lattice_instance_q15 * S,
-  uint16_t numStages,
-  q15_t * pkCoeffs,
-  q15_t * pvCoeffs,
-  q15_t * pState,
-  uint32_t blockSize);
+        arm_iir_lattice_instance_q15 * S,
+        uint16_t numStages,
+        q15_t * pkCoeffs,
+        q15_t * pvCoeffs,
+        q15_t * pState,
+        uint32_t blockSize);
 
 
   /**
@@ -3948,10 +4116,10 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;    /**< number of coefficients in the filter. */
-    float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
-    float32_t mu;        /**< step size that controls filter coefficient updates. */
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
+          float32_t mu;        /**< step size that controls filter coefficient updates. */
   } arm_lms_instance_f32;
 
 
@@ -3966,11 +4134,11 @@ void arm_rfft_fast_f32(
    */
   void arm_lms_f32(
   const arm_lms_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pRef,
-  float32_t * pOut,
-  float32_t * pErr,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        float32_t * pRef,
+        float32_t * pOut,
+        float32_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -3983,12 +4151,12 @@ void arm_rfft_fast_f32(
    * @param[in] blockSize  number of samples to process.
    */
   void arm_lms_init_f32(
-  arm_lms_instance_f32 * S,
-  uint16_t numTaps,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  float32_t mu,
-  uint32_t blockSize);
+        arm_lms_instance_f32 * S,
+        uint16_t numTaps,
+        float32_t * pCoeffs,
+        float32_t * pState,
+        float32_t mu,
+        uint32_t blockSize);
 
 
   /**
@@ -3996,11 +4164,11 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;    /**< number of coefficients in the filter. */
-    q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
-    q15_t mu;            /**< step size that controls filter coefficient updates. */
-    uint32_t postShift;  /**< bit shift applied to coefficients. */
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+          q15_t mu;            /**< step size that controls filter coefficient updates. */
+          uint32_t postShift;  /**< bit shift applied to coefficients. */
   } arm_lms_instance_q15;
 
 
@@ -4015,13 +4183,13 @@ void arm_rfft_fast_f32(
    * @param[in] postShift  bit shift applied to coefficients.
    */
   void arm_lms_init_q15(
-  arm_lms_instance_q15 * S,
-  uint16_t numTaps,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  q15_t mu,
-  uint32_t blockSize,
-  uint32_t postShift);
+        arm_lms_instance_q15 * S,
+        uint16_t numTaps,
+        q15_t * pCoeffs,
+        q15_t * pState,
+        q15_t mu,
+        uint32_t blockSize,
+        uint32_t postShift);
 
 
   /**
@@ -4035,11 +4203,11 @@ void arm_rfft_fast_f32(
    */
   void arm_lms_q15(
   const arm_lms_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pRef,
-  q15_t * pOut,
-  q15_t * pErr,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q15_t * pRef,
+        q15_t * pOut,
+        q15_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -4047,11 +4215,11 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;    /**< number of coefficients in the filter. */
-    q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
-    q31_t mu;            /**< step size that controls filter coefficient updates. */
-    uint32_t postShift;  /**< bit shift applied to coefficients. */
+          uint16_t numTaps;    /**< number of coefficients in the filter. */
+          q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+          q31_t mu;            /**< step size that controls filter coefficient updates. */
+          uint32_t postShift;  /**< bit shift applied to coefficients. */
   } arm_lms_instance_q31;
 
 
@@ -4066,11 +4234,11 @@ void arm_rfft_fast_f32(
    */
   void arm_lms_q31(
   const arm_lms_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pRef,
-  q31_t * pOut,
-  q31_t * pErr,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q31_t * pRef,
+        q31_t * pOut,
+        q31_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -4084,13 +4252,13 @@ void arm_rfft_fast_f32(
    * @param[in] postShift  bit shift applied to coefficients.
    */
   void arm_lms_init_q31(
-  arm_lms_instance_q31 * S,
-  uint16_t numTaps,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  q31_t mu,
-  uint32_t blockSize,
-  uint32_t postShift);
+        arm_lms_instance_q31 * S,
+        uint16_t numTaps,
+        q31_t * pCoeffs,
+        q31_t * pState,
+        q31_t mu,
+        uint32_t blockSize,
+        uint32_t postShift);
 
 
   /**
@@ -4098,12 +4266,12 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;     /**< number of coefficients in the filter. */
-    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
-    float32_t mu;         /**< step size that control filter coefficient updates. */
-    float32_t energy;     /**< saves previous frame energy. */
-    float32_t x0;         /**< saves previous input sample. */
+          uint16_t numTaps;     /**< number of coefficients in the filter. */
+          float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+          float32_t mu;         /**< step size that control filter coefficient updates. */
+          float32_t energy;     /**< saves previous frame energy. */
+          float32_t x0;         /**< saves previous input sample. */
   } arm_lms_norm_instance_f32;
 
 
@@ -4117,12 +4285,12 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process.
    */
   void arm_lms_norm_f32(
-  arm_lms_norm_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pRef,
-  float32_t * pOut,
-  float32_t * pErr,
-  uint32_t blockSize);
+        arm_lms_norm_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pRef,
+        float32_t * pOut,
+        float32_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -4135,12 +4303,12 @@ void arm_rfft_fast_f32(
    * @param[in] blockSize  number of samples to process.
    */
   void arm_lms_norm_init_f32(
-  arm_lms_norm_instance_f32 * S,
-  uint16_t numTaps,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  float32_t mu,
-  uint32_t blockSize);
+        arm_lms_norm_instance_f32 * S,
+        uint16_t numTaps,
+        float32_t * pCoeffs,
+        float32_t * pState,
+        float32_t mu,
+        uint32_t blockSize);
 
 
   /**
@@ -4148,14 +4316,14 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;     /**< number of coefficients in the filter. */
-    q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
-    q31_t mu;             /**< step size that controls filter coefficient updates. */
-    uint8_t postShift;    /**< bit shift applied to coefficients. */
-    q31_t *recipTable;    /**< points to the reciprocal initial value table. */
-    q31_t energy;         /**< saves previous frame energy. */
-    q31_t x0;             /**< saves previous input sample. */
+          uint16_t numTaps;     /**< number of coefficients in the filter. */
+          q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+          q31_t mu;             /**< step size that controls filter coefficient updates. */
+          uint8_t postShift;    /**< bit shift applied to coefficients. */
+    const q31_t *recipTable;    /**< points to the reciprocal initial value table. */
+          q31_t energy;         /**< saves previous frame energy. */
+          q31_t x0;             /**< saves previous input sample. */
   } arm_lms_norm_instance_q31;
 
 
@@ -4169,12 +4337,12 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process.
    */
   void arm_lms_norm_q31(
-  arm_lms_norm_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pRef,
-  q31_t * pOut,
-  q31_t * pErr,
-  uint32_t blockSize);
+        arm_lms_norm_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pRef,
+        q31_t * pOut,
+        q31_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -4188,13 +4356,13 @@ void arm_rfft_fast_f32(
    * @param[in] postShift  bit shift applied to coefficients.
    */
   void arm_lms_norm_init_q31(
-  arm_lms_norm_instance_q31 * S,
-  uint16_t numTaps,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  q31_t mu,
-  uint32_t blockSize,
-  uint8_t postShift);
+        arm_lms_norm_instance_q31 * S,
+        uint16_t numTaps,
+        q31_t * pCoeffs,
+        q31_t * pState,
+        q31_t mu,
+        uint32_t blockSize,
+        uint8_t postShift);
 
 
   /**
@@ -4202,14 +4370,14 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;     /**< Number of coefficients in the filter. */
-    q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-    q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
-    q15_t mu;             /**< step size that controls filter coefficient updates. */
-    uint8_t postShift;    /**< bit shift applied to coefficients. */
-    q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
-    q15_t energy;         /**< saves previous frame energy. */
-    q15_t x0;             /**< saves previous input sample. */
+          uint16_t numTaps;     /**< Number of coefficients in the filter. */
+          q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+          q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+          q15_t mu;             /**< step size that controls filter coefficient updates. */
+          uint8_t postShift;    /**< bit shift applied to coefficients. */
+    const q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
+          q15_t energy;         /**< saves previous frame energy. */
+          q15_t x0;             /**< saves previous input sample. */
   } arm_lms_norm_instance_q15;
 
 
@@ -4223,12 +4391,12 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  number of samples to process.
    */
   void arm_lms_norm_q15(
-  arm_lms_norm_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pRef,
-  q15_t * pOut,
-  q15_t * pErr,
-  uint32_t blockSize);
+        arm_lms_norm_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pRef,
+        q15_t * pOut,
+        q15_t * pErr,
+        uint32_t blockSize);
 
 
   /**
@@ -4242,13 +4410,13 @@ void arm_rfft_fast_f32(
    * @param[in] postShift  bit shift applied to coefficients.
    */
   void arm_lms_norm_init_q15(
-  arm_lms_norm_instance_q15 * S,
-  uint16_t numTaps,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  q15_t mu,
-  uint32_t blockSize,
-  uint8_t postShift);
+        arm_lms_norm_instance_q15 * S,
+        uint16_t numTaps,
+        q15_t * pCoeffs,
+        q15_t * pState,
+        q15_t mu,
+        uint32_t blockSize,
+        uint8_t postShift);
 
 
   /**
@@ -4260,81 +4428,80 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
   void arm_correlate_f32(
-  float32_t * pSrcA,
-  uint32_t srcALen,
-  float32_t * pSrcB,
-  uint32_t srcBLen,
-  float32_t * pDst);
+  const float32_t * pSrcA,
+        uint32_t srcALen,
+  const float32_t * pSrcB,
+        uint32_t srcBLen,
+        float32_t * pDst);
 
 
-   /**
-   * @brief Correlation of Q15 sequences
-   * @param[in]  pSrcA     points to the first input sequence.
-   * @param[in]  srcALen   length of the first input sequence.
-   * @param[in]  pSrcB     points to the second input sequence.
-   * @param[in]  srcBLen   length of the second input sequence.
-   * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   */
-  void arm_correlate_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  q15_t * pScratch);
-
+/**
+ @brief Correlation of Q15 sequences
+ @param[in]  pSrcA     points to the first input sequence
+ @param[in]  srcALen   length of the first input sequence
+ @param[in]  pSrcB     points to the second input sequence
+ @param[in]  srcBLen   length of the second input sequence
+ @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+ @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+*/
+void arm_correlate_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch);
 
-  /**
-   * @brief Correlation of Q15 sequences.
-   * @param[in]  pSrcA    points to the first input sequence.
-   * @param[in]  srcALen  length of the first input sequence.
-   * @param[in]  pSrcB    points to the second input sequence.
-   * @param[in]  srcBLen  length of the second input sequence.
-   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   */
 
+/**
+  @brief Correlation of Q15 sequences.
+  @param[in]  pSrcA    points to the first input sequence
+  @param[in]  srcALen  length of the first input sequence
+  @param[in]  pSrcB    points to the second input sequence
+  @param[in]  srcBLen  length of the second input sequence
+  @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+ */
   void arm_correlate_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst);
-
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
-  /**
-   * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
-   * @param[in]  pSrcA    points to the first input sequence.
-   * @param[in]  srcALen  length of the first input sequence.
-   * @param[in]  pSrcB    points to the second input sequence.
-   * @param[in]  srcBLen  length of the second input sequence.
-   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   */
 
-  void arm_correlate_fast_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst);
+/**
+  @brief         Correlation of Q15 sequences (fast version).
+  @param[in]     pSrcA      points to the first input sequence
+  @param[in]     srcALen    length of the first input sequence
+  @param[in]     pSrcB      points to the second input sequence
+  @param[in]     srcBLen    length of the second input sequence
+  @param[out]    pDst       points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.
+  @return        none
+ */
+void arm_correlate_fast_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst);
 
 
-  /**
-   * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
-   * @param[in]  pSrcA     points to the first input sequence.
-   * @param[in]  srcALen   length of the first input sequence.
-   * @param[in]  pSrcB     points to the second input sequence.
-   * @param[in]  srcBLen   length of the second input sequence.
-   * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   */
-  void arm_correlate_fast_opt_q15(
-  q15_t * pSrcA,
-  uint32_t srcALen,
-  q15_t * pSrcB,
-  uint32_t srcBLen,
-  q15_t * pDst,
-  q15_t * pScratch);
+/**
+  @brief Correlation of Q15 sequences (fast version).
+  @param[in]  pSrcA     points to the first input sequence.
+  @param[in]  srcALen   length of the first input sequence.
+  @param[in]  pSrcB     points to the second input sequence.
+  @param[in]  srcBLen   length of the second input sequence.
+  @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+  @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+ */
+void arm_correlate_fast_opt_q15(
+  const q15_t * pSrcA,
+        uint32_t srcALen,
+  const q15_t * pSrcB,
+        uint32_t srcBLen,
+        q15_t * pDst,
+        q15_t * pScratch);
 
 
   /**
@@ -4346,27 +4513,27 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
   void arm_correlate_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst);
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
 
-  /**
-   * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in]  pSrcA    points to the first input sequence.
-   * @param[in]  srcALen  length of the first input sequence.
-   * @param[in]  pSrcB    points to the second input sequence.
-   * @param[in]  srcBLen  length of the second input sequence.
-   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   */
-  void arm_correlate_fast_q31(
-  q31_t * pSrcA,
-  uint32_t srcALen,
-  q31_t * pSrcB,
-  uint32_t srcBLen,
-  q31_t * pDst);
+/**
+  @brief Correlation of Q31 sequences (fast version).
+  @param[in]  pSrcA    points to the first input sequence
+  @param[in]  srcALen  length of the first input sequence
+  @param[in]  pSrcB    points to the second input sequence
+  @param[in]  srcBLen  length of the second input sequence
+  @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+ */
+void arm_correlate_fast_q31(
+  const q31_t * pSrcA,
+        uint32_t srcALen,
+  const q31_t * pSrcB,
+        uint32_t srcBLen,
+        q31_t * pDst);
 
 
  /**
@@ -4380,13 +4547,13 @@ void arm_rfft_fast_f32(
    * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    */
   void arm_correlate_opt_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst,
-  q15_t * pScratch1,
-  q15_t * pScratch2);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst,
+        q15_t * pScratch1,
+        q15_t * pScratch2);
 
 
   /**
@@ -4398,11 +4565,11 @@ void arm_rfft_fast_f32(
    * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
   void arm_correlate_q7(
-  q7_t * pSrcA,
-  uint32_t srcALen,
-  q7_t * pSrcB,
-  uint32_t srcBLen,
-  q7_t * pDst);
+  const q7_t * pSrcA,
+        uint32_t srcALen,
+  const q7_t * pSrcB,
+        uint32_t srcBLen,
+        q7_t * pDst);
 
 
   /**
@@ -4410,12 +4577,12 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;             /**< number of coefficients in the filter. */
-    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   } arm_fir_sparse_instance_f32;
 
   /**
@@ -4423,12 +4590,12 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;             /**< number of coefficients in the filter. */
-    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   } arm_fir_sparse_instance_q31;
 
   /**
@@ -4436,12 +4603,12 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;             /**< number of coefficients in the filter. */
-    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   } arm_fir_sparse_instance_q15;
 
   /**
@@ -4449,12 +4616,12 @@ void arm_rfft_fast_f32(
    */
   typedef struct
   {
-    uint16_t numTaps;             /**< number of coefficients in the filter. */
-    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
-    q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
-    q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
-    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
-    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+          uint16_t numTaps;             /**< number of coefficients in the filter. */
+          uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+          q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    const q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
+          uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+          int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   } arm_fir_sparse_instance_q7;
 
 
@@ -4467,11 +4634,11 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize   number of input samples to process per call.
    */
   void arm_fir_sparse_f32(
-  arm_fir_sparse_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst,
-  float32_t * pScratchIn,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_f32 * S,
+  const float32_t * pSrc,
+        float32_t * pDst,
+        float32_t * pScratchIn,
+        uint32_t blockSize);
 
 
   /**
@@ -4485,13 +4652,13 @@ void arm_rfft_fast_f32(
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
   void arm_fir_sparse_init_f32(
-  arm_fir_sparse_instance_f32 * S,
-  uint16_t numTaps,
-  float32_t * pCoeffs,
-  float32_t * pState,
-  int32_t * pTapDelay,
-  uint16_t maxDelay,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_f32 * S,
+        uint16_t numTaps,
+  const float32_t * pCoeffs,
+        float32_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
 
   /**
@@ -4503,11 +4670,11 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize   number of input samples to process per call.
    */
   void arm_fir_sparse_q31(
-  arm_fir_sparse_instance_q31 * S,
-  q31_t * pSrc,
-  q31_t * pDst,
-  q31_t * pScratchIn,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q31 * S,
+  const q31_t * pSrc,
+        q31_t * pDst,
+        q31_t * pScratchIn,
+        uint32_t blockSize);
 
 
   /**
@@ -4521,13 +4688,13 @@ void arm_rfft_fast_f32(
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
   void arm_fir_sparse_init_q31(
-  arm_fir_sparse_instance_q31 * S,
-  uint16_t numTaps,
-  q31_t * pCoeffs,
-  q31_t * pState,
-  int32_t * pTapDelay,
-  uint16_t maxDelay,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q31 * S,
+        uint16_t numTaps,
+  const q31_t * pCoeffs,
+        q31_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
 
   /**
@@ -4540,12 +4707,12 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize    number of input samples to process per call.
    */
   void arm_fir_sparse_q15(
-  arm_fir_sparse_instance_q15 * S,
-  q15_t * pSrc,
-  q15_t * pDst,
-  q15_t * pScratchIn,
-  q31_t * pScratchOut,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q15 * S,
+  const q15_t * pSrc,
+        q15_t * pDst,
+        q15_t * pScratchIn,
+        q31_t * pScratchOut,
+        uint32_t blockSize);
 
 
   /**
@@ -4559,13 +4726,13 @@ void arm_rfft_fast_f32(
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
   void arm_fir_sparse_init_q15(
-  arm_fir_sparse_instance_q15 * S,
-  uint16_t numTaps,
-  q15_t * pCoeffs,
-  q15_t * pState,
-  int32_t * pTapDelay,
-  uint16_t maxDelay,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q15 * S,
+        uint16_t numTaps,
+  const q15_t * pCoeffs,
+        q15_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
 
   /**
@@ -4578,12 +4745,12 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize    number of input samples to process per call.
    */
   void arm_fir_sparse_q7(
-  arm_fir_sparse_instance_q7 * S,
-  q7_t * pSrc,
-  q7_t * pDst,
-  q7_t * pScratchIn,
-  q31_t * pScratchOut,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q7 * S,
+  const q7_t * pSrc,
+        q7_t * pDst,
+        q7_t * pScratchIn,
+        q31_t * pScratchOut,
+        uint32_t blockSize);
 
 
   /**
@@ -4597,13 +4764,13 @@ void arm_rfft_fast_f32(
    * @param[in]     blockSize  number of samples that will be processed per block.
    */
   void arm_fir_sparse_init_q7(
-  arm_fir_sparse_instance_q7 * S,
-  uint16_t numTaps,
-  q7_t * pCoeffs,
-  q7_t * pState,
-  int32_t * pTapDelay,
-  uint16_t maxDelay,
-  uint32_t blockSize);
+        arm_fir_sparse_instance_q7 * S,
+        uint16_t numTaps,
+  const q7_t * pCoeffs,
+        q7_t * pState,
+        int32_t * pTapDelay,
+        uint16_t maxDelay,
+        uint32_t blockSize);
 
 
   /**
@@ -4613,9 +4780,9 @@ void arm_rfft_fast_f32(
    * @param[out] pCosVal  points to the processed cos output.
    */
   void arm_sin_cos_f32(
-  float32_t theta,
-  float32_t * pSinVal,
-  float32_t * pCosVal);
+        float32_t theta,
+        float32_t * pSinVal,
+        float32_t * pCosVal);
 
 
   /**
@@ -4625,9 +4792,9 @@ void arm_rfft_fast_f32(
    * @param[out] pCosVal  points to the processed cosine output.
    */
   void arm_sin_cos_q31(
-  q31_t theta,
-  q31_t * pSinVal,
-  q31_t * pCosVal);
+        q31_t theta,
+        q31_t * pSinVal,
+        q31_t * pCosVal);
 
 
   /**
@@ -4637,9 +4804,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_conj_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t numSamples);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
 
   /**
    * @brief  Q31 complex conjugate.
@@ -4648,9 +4815,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_conj_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t numSamples);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -4660,9 +4827,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_conj_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t numSamples);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -4672,9 +4839,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_squared_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t numSamples);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -4684,9 +4851,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_squared_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t numSamples);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -4696,9 +4863,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_squared_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t numSamples);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
 
  /**
@@ -4723,7 +4890,8 @@ void arm_rfft_fast_f32(
    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
    *    A0 = Kp + Ki + Kd
    *    A1 = (-Kp ) - (2 * Kd )
-   *    A2 = Kd  </pre>
+   *    A2 = Kd
+   * </pre>
    *
    * \par
    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
@@ -4768,12 +4936,12 @@ void arm_rfft_fast_f32(
    */
 
   /**
-   * @brief  Process function for the floating-point PID Control.
+   * @brief         Process function for the floating-point PID Control.
    * @param[in,out] S   is an instance of the floating-point PID Control structure
    * @param[in]     in  input sample to process
-   * @return out processed output sample.
+   * @return        processed output sample.
    */
-  CMSIS_INLINE __STATIC_INLINE float32_t arm_pid_f32(
+  __STATIC_FORCEINLINE float32_t arm_pid_f32(
   arm_pid_instance_f32 * S,
   float32_t in)
   {
@@ -4793,21 +4961,20 @@ void arm_rfft_fast_f32(
 
   }
 
-  /**
-   * @brief  Process function for the Q31 PID Control.
-   * @param[in,out] S  points to an instance of the Q31 PID Control structure
-   * @param[in]     in  input sample to process
-   * @return out processed output sample.
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using an internal 64-bit accumulator.
-   * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
-   * Thus, if the accumulator result overflows it wraps around rather than clip.
-   * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
-   * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
-   */
-  CMSIS_INLINE __STATIC_INLINE q31_t arm_pid_q31(
+/**
+  @brief         Process function for the Q31 PID Control.
+  @param[in,out] S  points to an instance of the Q31 PID Control structure
+  @param[in]     in  input sample to process
+  @return        processed output sample.
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 64-bit accumulator.
+         The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
+         Thus, if the accumulator result overflows it wraps around rather than clip.
+         In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
+         After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
+ */
+__STATIC_FORCEINLINE q31_t arm_pid_q31(
   arm_pid_instance_q31 * S,
   q31_t in)
   {
@@ -4839,22 +5006,21 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief  Process function for the Q15 PID Control.
-   * @param[in,out] S   points to an instance of the Q15 PID Control structure
-   * @param[in]     in  input sample to process
-   * @return out processed output sample.
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using a 64-bit internal accumulator.
-   * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
-   * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
-   * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
-   * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
-   * Lastly, the accumulator is saturated to yield a result in 1.15 format.
-   */
-  CMSIS_INLINE __STATIC_INLINE q15_t arm_pid_q15(
+/**
+  @brief         Process function for the Q15 PID Control.
+  @param[in,out] S   points to an instance of the Q15 PID Control structure
+  @param[in]     in  input sample to process
+  @return        processed output sample.
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using a 64-bit internal accumulator.
+         Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
+         The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
+         There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
+         After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
+         Lastly, the accumulator is saturated to yield a result in 1.15 format.
+ */
+__STATIC_FORCEINLINE q15_t arm_pid_q15(
   arm_pid_instance_q15 * S,
   q15_t in)
   {
@@ -4862,16 +5028,13 @@ void arm_rfft_fast_f32(
     q15_t out;
 
 #if defined (ARM_MATH_DSP)
-    __SIMD32_TYPE *vstate;
-
     /* Implementation of PID controller */
 
     /* acc = A0 * x[n]  */
     acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
 
     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
-    vstate = __SIMD32_CONST(S->state);
-    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)*vstate, (uint64_t)acc);
+    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)read_q15x2 (S->state), (uint64_t)acc);
 #else
     /* acc = A0 * x[n]  */
     acc = ((q31_t) S->A0) * in;
@@ -4964,8 +5127,9 @@ void arm_rfft_fast_f32(
    * @param[in]  Ib       input three-phase coordinate <code>b</code>
    * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   * @return        none
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_clarke_f32(
+  __STATIC_FORCEINLINE void arm_clarke_f32(
   float32_t Ia,
   float32_t Ib,
   float32_t * pIalpha,
@@ -4979,20 +5143,20 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief  Clarke transform for Q31 version
-   * @param[in]  Ia       input three-phase coordinate <code>a</code>
-   * @param[in]  Ib       input three-phase coordinate <code>b</code>
-   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
-   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using an internal 32-bit accumulator.
-   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
-   * There is saturation on the addition, hence there is no risk of overflow.
-   */
-  CMSIS_INLINE __STATIC_INLINE void arm_clarke_q31(
+/**
+  @brief  Clarke transform for Q31 version
+  @param[in]  Ia       input three-phase coordinate <code>a</code>
+  @param[in]  Ib       input three-phase coordinate <code>b</code>
+  @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+  @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void arm_clarke_q31(
   q31_t Ia,
   q31_t Ib,
   q31_t * pIalpha,
@@ -5017,18 +5181,6 @@ void arm_rfft_fast_f32(
    * @} end of clarke group
    */
 
-  /**
-   * @brief  Converts the elements of the Q7 vector to Q31 vector.
-   * @param[in]  pSrc       input pointer
-   * @param[out] pDst       output pointer
-   * @param[in]  blockSize  number of samples to process
-   */
-  void arm_q7_to_q31(
-  q7_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
-
-
 
   /**
    * @ingroup groupController
@@ -5061,8 +5213,9 @@ void arm_rfft_fast_f32(
    * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
    * @param[out] pIa     points to output three-phase coordinate <code>a</code>
    * @param[out] pIb     points to output three-phase coordinate <code>b</code>
+   * @return     none
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_inv_clarke_f32(
+  __STATIC_FORCEINLINE void arm_inv_clarke_f32(
   float32_t Ialpha,
   float32_t Ibeta,
   float32_t * pIa,
@@ -5076,20 +5229,20 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief  Inverse Clarke transform for Q31 version
-   * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
-   * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
-   * @param[out] pIa     points to output three-phase coordinate <code>a</code>
-   * @param[out] pIb     points to output three-phase coordinate <code>b</code>
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using an internal 32-bit accumulator.
-   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
-   * There is saturation on the subtraction, hence there is no risk of overflow.
-   */
-  CMSIS_INLINE __STATIC_INLINE void arm_inv_clarke_q31(
+/**
+  @brief  Inverse Clarke transform for Q31 version
+  @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
+  @param[in]  Ibeta   input two-phase orthogonal vector axis beta
+  @param[out] pIa     points to output three-phase coordinate <code>a</code>
+  @param[out] pIb     points to output three-phase coordinate <code>b</code>
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the subtraction, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void arm_inv_clarke_q31(
   q31_t Ialpha,
   q31_t Ibeta,
   q31_t * pIa,
@@ -5114,17 +5267,6 @@ void arm_rfft_fast_f32(
    * @} end of inv_clarke group
    */
 
-  /**
-   * @brief  Converts the elements of the Q7 vector to Q15 vector.
-   * @param[in]  pSrc       input pointer
-   * @param[out] pDst       output pointer
-   * @param[in]  blockSize  number of samples to process
-   */
-  void arm_q7_to_q15(
-  q7_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
-
 
 
   /**
@@ -5168,11 +5310,12 @@ void arm_rfft_fast_f32(
    * @param[out] pIq     points to output   rotor reference frame q
    * @param[in]  sinVal  sine value of rotation angle theta
    * @param[in]  cosVal  cosine value of rotation angle theta
+   * @return     none
    *
    * The function implements the forward Park transform.
    *
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_park_f32(
+  __STATIC_FORCEINLINE void arm_park_f32(
   float32_t Ialpha,
   float32_t Ibeta,
   float32_t * pId,
@@ -5188,22 +5331,22 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief  Park transform for Q31 version
-   * @param[in]  Ialpha  input two-phase vector coordinate alpha
-   * @param[in]  Ibeta   input two-phase vector coordinate beta
-   * @param[out] pId     points to output rotor reference frame d
-   * @param[out] pIq     points to output rotor reference frame q
-   * @param[in]  sinVal  sine value of rotation angle theta
-   * @param[in]  cosVal  cosine value of rotation angle theta
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using an internal 32-bit accumulator.
-   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
-   * There is saturation on the addition and subtraction, hence there is no risk of overflow.
-   */
-  CMSIS_INLINE __STATIC_INLINE void arm_park_q31(
+/**
+  @brief  Park transform for Q31 version
+  @param[in]  Ialpha  input two-phase vector coordinate alpha
+  @param[in]  Ibeta   input two-phase vector coordinate beta
+  @param[out] pId     points to output rotor reference frame d
+  @param[out] pIq     points to output rotor reference frame q
+  @param[in]  sinVal  sine value of rotation angle theta
+  @param[in]  cosVal  cosine value of rotation angle theta
+  @return     none
+
+  \par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition and subtraction, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void arm_park_q31(
   q31_t Ialpha,
   q31_t Ibeta,
   q31_t * pId,
@@ -5238,17 +5381,6 @@ void arm_rfft_fast_f32(
    * @} end of park group
    */
 
-  /**
-   * @brief  Converts the elements of the Q7 vector to floating-point vector.
-   * @param[in]  pSrc       is input pointer
-   * @param[out] pDst       is output pointer
-   * @param[in]  blockSize  is the number of samples to process
-   */
-  void arm_q7_to_float(
-  q7_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
-
 
   /**
    * @ingroup groupController
@@ -5284,8 +5416,9 @@ void arm_rfft_fast_f32(
    * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
    * @param[in]  sinVal   sine value of rotation angle theta
    * @param[in]  cosVal   cosine value of rotation angle theta
+   * @return     none
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_inv_park_f32(
+  __STATIC_FORCEINLINE void arm_inv_park_f32(
   float32_t Id,
   float32_t Iq,
   float32_t * pIalpha,
@@ -5301,22 +5434,22 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief  Inverse Park transform for   Q31 version
-   * @param[in]  Id       input coordinate of rotor reference frame d
-   * @param[in]  Iq       input coordinate of rotor reference frame q
-   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
-   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
-   * @param[in]  sinVal   sine value of rotation angle theta
-   * @param[in]  cosVal   cosine value of rotation angle theta
-   *
-   * <b>Scaling and Overflow Behavior:</b>
-   * \par
-   * The function is implemented using an internal 32-bit accumulator.
-   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
-   * There is saturation on the addition, hence there is no risk of overflow.
-   */
-  CMSIS_INLINE __STATIC_INLINE void arm_inv_park_q31(
+/**
+  @brief  Inverse Park transform for   Q31 version
+  @param[in]  Id       input coordinate of rotor reference frame d
+  @param[in]  Iq       input coordinate of rotor reference frame q
+  @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+  @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+  @param[in]  sinVal   sine value of rotation angle theta
+  @param[in]  cosVal   cosine value of rotation angle theta
+  @return     none
+
+  @par Scaling and Overflow Behavior
+         The function is implemented using an internal 32-bit accumulator.
+         The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+         There is saturation on the addition, hence there is no risk of overflow.
+ */
+__STATIC_FORCEINLINE void arm_inv_park_q31(
   q31_t Id,
   q31_t Iq,
   q31_t * pIalpha,
@@ -5352,17 +5485,6 @@ void arm_rfft_fast_f32(
    */
 
 
-  /**
-   * @brief  Converts the elements of the Q31 vector to floating-point vector.
-   * @param[in]  pSrc       is input pointer
-   * @param[out] pDst       is output pointer
-   * @param[in]  blockSize  is the number of samples to process
-   */
-  void arm_q31_to_float(
-  q31_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
-
   /**
    * @ingroup groupInterpolation
    */
@@ -5411,7 +5533,7 @@ void arm_rfft_fast_f32(
    * @return y processed output sample.
    *
    */
-  CMSIS_INLINE __STATIC_INLINE float32_t arm_linear_interp_f32(
+  __STATIC_FORCEINLINE float32_t arm_linear_interp_f32(
   arm_linear_interp_instance_f32 * S,
   float32_t x)
   {
@@ -5468,7 +5590,7 @@ void arm_rfft_fast_f32(
    * This function can support maximum of table size 2^12.
    *
    */
-  CMSIS_INLINE __STATIC_INLINE q31_t arm_linear_interp_q31(
+  __STATIC_FORCEINLINE q31_t arm_linear_interp_q31(
   q31_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5526,7 +5648,7 @@ void arm_rfft_fast_f32(
    * This function can support maximum of table size 2^12.
    *
    */
-  CMSIS_INLINE __STATIC_INLINE q15_t arm_linear_interp_q15(
+  __STATIC_FORCEINLINE q15_t arm_linear_interp_q15(
   q15_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5583,7 +5705,7 @@ void arm_rfft_fast_f32(
    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
    * This function can support maximum of table size 2^12.
    */
-  CMSIS_INLINE __STATIC_INLINE q7_t arm_linear_interp_q7(
+  __STATIC_FORCEINLINE q7_t arm_linear_interp_q7(
   q7_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5716,28 +5838,34 @@ void arm_rfft_fast_f32(
    * @{
    */
 
-  /**
-   * @brief  Floating-point square root function.
-   * @param[in]  in    input value.
-   * @param[out] pOut  square root of input value.
-   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
-   * <code>in</code> is negative value and returns zero output for negative values.
-   */
-  CMSIS_INLINE __STATIC_INLINE arm_status arm_sqrt_f32(
+/**
+  @brief         Floating-point square root function.
+  @param[in]     in    input value
+  @param[out]    pOut  square root of input value
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : input value is positive
+                   - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+__STATIC_FORCEINLINE arm_status arm_sqrt_f32(
   float32_t in,
   float32_t * pOut)
   {
     if (in >= 0.0f)
     {
-
-#if   (__FPU_USED == 1) && defined ( __CC_ARM   )
+#if defined ( __CC_ARM )
+  #if defined __TARGET_FPU_VFP
       *pOut = __sqrtf(in);
-#elif (__FPU_USED == 1) && (defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050))
-      *pOut = __builtin_sqrtf(in);
-#elif (__FPU_USED == 1) && defined(__GNUC__)
-      *pOut = __builtin_sqrtf(in);
-#elif (__FPU_USED == 1) && defined ( __ICCARM__ ) && (__VER__ >= 6040000)
+  #else
+      *pOut = sqrtf(in);
+  #endif
+
+#elif defined ( __ICCARM__ )
+  #if defined __ARMVFP__
       __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
+  #else
+      *pOut = sqrtf(in);
+  #endif
+
 #else
       *pOut = sqrtf(in);
 #endif
@@ -5752,28 +5880,53 @@ void arm_rfft_fast_f32(
   }
 
 
-  /**
-   * @brief Q31 square root function.
-   * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
-   * @param[out] pOut  square root of input value.
-   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
-   * <code>in</code> is negative value and returns zero output for negative values.
-   */
-  arm_status arm_sqrt_q31(
+/**
+  @brief         Q31 square root function.
+  @param[in]     in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
+  @param[out]    pOut  points to square root of input value
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : input value is positive
+                   - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+arm_status arm_sqrt_q31(
   q31_t in,
   q31_t * pOut);
 
 
+/**
+  @brief         Q15 square root function.
+  @param[in]     in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF
+  @param[out]    pOut  points to square root of input value
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : input value is positive
+                   - \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
+ */
+arm_status arm_sqrt_q15(
+  q15_t in,
+  q15_t * pOut);
+
   /**
-   * @brief  Q15 square root function.
-   * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
-   * @param[out] pOut  square root of input value.
+   * @brief  Vector Floating-point square root function.
+   * @param[in]  pIn   input vector.
+   * @param[out] pOut  vector of square roots of input elements.
+   * @param[in]  len   length of input vector.
    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
    * <code>in</code> is negative value and returns zero output for negative values.
    */
-  arm_status arm_sqrt_q15(
-  q15_t in,
-  q15_t * pOut);
+  void arm_vsqrt_f32(
+  float32_t * pIn,
+  float32_t * pOut,
+  uint16_t len);
+
+  void arm_vsqrt_q31(
+  q31_t * pIn,
+  q31_t * pOut,
+  uint16_t len);
+
+  void arm_vsqrt_q15(
+  q15_t * pIn,
+  q15_t * pOut,
+  uint16_t len);
 
   /**
    * @} end of SQRT group
@@ -5783,7 +5936,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief floating-point Circular write function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_f32(
+  __STATIC_FORCEINLINE void arm_circularWrite_f32(
   int32_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -5828,7 +5981,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief floating-point Circular Read function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_f32(
+  __STATIC_FORCEINLINE void arm_circularRead_f32(
   int32_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -5840,12 +5993,13 @@ void arm_rfft_fast_f32(
   uint32_t blockSize)
   {
     uint32_t i = 0U;
-    int32_t rOffset, dst_end;
+    int32_t rOffset;
+    int32_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
     rOffset = *readOffset;
-    dst_end = (int32_t) (dst_base + dst_length);
+    dst_end = dst_base + dst_length;
 
     /* Loop over the blockSize */
     i = blockSize;
@@ -5858,7 +6012,7 @@ void arm_rfft_fast_f32(
       /* Update the input pointer */
       dst += dstInc;
 
-      if (dst == (int32_t *) dst_end)
+      if (dst == dst_end)
       {
         dst = dst_base;
       }
@@ -5883,7 +6037,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief Q15 Circular write function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_q15(
+  __STATIC_FORCEINLINE void arm_circularWrite_q15(
   q15_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -5927,7 +6081,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief Q15 Circular Read function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_q15(
+  __STATIC_FORCEINLINE void arm_circularRead_q15(
   q15_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -5939,13 +6093,14 @@ void arm_rfft_fast_f32(
   uint32_t blockSize)
   {
     uint32_t i = 0;
-    int32_t rOffset, dst_end;
+    int32_t rOffset;
+    q15_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
     rOffset = *readOffset;
 
-    dst_end = (int32_t) (dst_base + dst_length);
+    dst_end = dst_base + dst_length;
 
     /* Loop over the blockSize */
     i = blockSize;
@@ -5958,7 +6113,7 @@ void arm_rfft_fast_f32(
       /* Update the input pointer */
       dst += dstInc;
 
-      if (dst == (q15_t *) dst_end)
+      if (dst == dst_end)
       {
         dst = dst_base;
       }
@@ -5983,7 +6138,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief Q7 Circular write function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_q7(
+  __STATIC_FORCEINLINE void arm_circularWrite_q7(
   q7_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -6027,7 +6182,7 @@ void arm_rfft_fast_f32(
   /**
    * @brief Q7 Circular Read function.
    */
-  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_q7(
+  __STATIC_FORCEINLINE void arm_circularRead_q7(
   q7_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -6039,13 +6194,14 @@ void arm_rfft_fast_f32(
   uint32_t blockSize)
   {
     uint32_t i = 0;
-    int32_t rOffset, dst_end;
+    int32_t rOffset;
+    q7_t* dst_end;
 
     /* Copy the value of Index pointer that points
      * to the current location from where the input samples to be read */
     rOffset = *readOffset;
 
-    dst_end = (int32_t) (dst_base + dst_length);
+    dst_end = dst_base + dst_length;
 
     /* Loop over the blockSize */
     i = blockSize;
@@ -6058,7 +6214,7 @@ void arm_rfft_fast_f32(
       /* Update the input pointer */
       dst += dstInc;
 
-      if (dst == (q7_t *) dst_end)
+      if (dst == dst_end)
       {
         dst = dst_base;
       }
@@ -6087,9 +6243,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_power_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q63_t * pResult);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
 
 
   /**
@@ -6099,9 +6255,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_power_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 
   /**
@@ -6111,9 +6267,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_power_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q63_t * pResult);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q63_t * pResult);
 
 
   /**
@@ -6123,9 +6279,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_power_q7(
-  q7_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult);
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 
   /**
@@ -6135,9 +6291,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_mean_q7(
-  q7_t * pSrc,
-  uint32_t blockSize,
-  q7_t * pResult);
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult);
 
 
   /**
@@ -6147,9 +6303,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_mean_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 
   /**
@@ -6159,9 +6315,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_mean_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 
   /**
@@ -6171,9 +6327,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_mean_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 
   /**
@@ -6183,9 +6339,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_var_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 
   /**
@@ -6195,9 +6351,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_var_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 
   /**
@@ -6207,9 +6363,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_var_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 
   /**
@@ -6219,9 +6375,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_rms_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 
   /**
@@ -6231,9 +6387,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_rms_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 
   /**
@@ -6243,9 +6399,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_rms_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 
   /**
@@ -6255,9 +6411,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_std_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult);
 
 
   /**
@@ -6267,9 +6423,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_std_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult);
 
 
   /**
@@ -6279,9 +6435,9 @@ void arm_rfft_fast_f32(
    * @param[out] pResult    is output value.
    */
   void arm_std_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult);
 
 
   /**
@@ -6291,9 +6447,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_f32(
-  float32_t * pSrc,
-  float32_t * pDst,
-  uint32_t numSamples);
+  const float32_t * pSrc,
+        float32_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6303,9 +6459,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_q31(
-  q31_t * pSrc,
-  q31_t * pDst,
-  uint32_t numSamples);
+  const q31_t * pSrc,
+        q31_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6315,9 +6471,9 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in the input vector
    */
   void arm_cmplx_mag_q15(
-  q15_t * pSrc,
-  q15_t * pDst,
-  uint32_t numSamples);
+  const q15_t * pSrc,
+        q15_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6329,11 +6485,11 @@ void arm_rfft_fast_f32(
    * @param[out] imagResult  imaginary part of the result returned here
    */
   void arm_cmplx_dot_prod_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  uint32_t numSamples,
-  q31_t * realResult,
-  q31_t * imagResult);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        uint32_t numSamples,
+        q31_t * realResult,
+        q31_t * imagResult);
 
 
   /**
@@ -6345,11 +6501,11 @@ void arm_rfft_fast_f32(
    * @param[out] imagResult  imaginary part of the result returned here
    */
   void arm_cmplx_dot_prod_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  uint32_t numSamples,
-  q63_t * realResult,
-  q63_t * imagResult);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        uint32_t numSamples,
+        q63_t * realResult,
+        q63_t * imagResult);
 
 
   /**
@@ -6361,11 +6517,11 @@ void arm_rfft_fast_f32(
    * @param[out] imagResult  imaginary part of the result returned here
    */
   void arm_cmplx_dot_prod_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  uint32_t numSamples,
-  float32_t * realResult,
-  float32_t * imagResult);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        uint32_t numSamples,
+        float32_t * realResult,
+        float32_t * imagResult);
 
 
   /**
@@ -6376,10 +6532,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of samples in each vector
    */
   void arm_cmplx_mult_real_q15(
-  q15_t * pSrcCmplx,
-  q15_t * pSrcReal,
-  q15_t * pCmplxDst,
-  uint32_t numSamples);
+  const q15_t * pSrcCmplx,
+  const q15_t * pSrcReal,
+        q15_t * pCmplxDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6390,10 +6546,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of samples in each vector
    */
   void arm_cmplx_mult_real_q31(
-  q31_t * pSrcCmplx,
-  q31_t * pSrcReal,
-  q31_t * pCmplxDst,
-  uint32_t numSamples);
+  const q31_t * pSrcCmplx,
+  const q31_t * pSrcReal,
+        q31_t * pCmplxDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6404,10 +6560,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of samples in each vector
    */
   void arm_cmplx_mult_real_f32(
-  float32_t * pSrcCmplx,
-  float32_t * pSrcReal,
-  float32_t * pCmplxDst,
-  uint32_t numSamples);
+  const float32_t * pSrcCmplx,
+  const float32_t * pSrcReal,
+        float32_t * pCmplxDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6418,10 +6574,10 @@ void arm_rfft_fast_f32(
    * @param[in]  index      is the array index of the minimum value in the input buffer.
    */
   void arm_min_q7(
-  q7_t * pSrc,
-  uint32_t blockSize,
-  q7_t * result,
-  uint32_t * index);
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * result,
+        uint32_t * index);
 
 
   /**
@@ -6432,10 +6588,10 @@ void arm_rfft_fast_f32(
    * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
    */
   void arm_min_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult,
-  uint32_t * pIndex);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
 
   /**
@@ -6446,10 +6602,10 @@ void arm_rfft_fast_f32(
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
   void arm_min_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult,
-  uint32_t * pIndex);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
 
   /**
@@ -6460,10 +6616,10 @@ void arm_rfft_fast_f32(
    * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
   void arm_min_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult,
-  uint32_t * pIndex);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
 
 /**
@@ -6474,10 +6630,10 @@ void arm_rfft_fast_f32(
  * @param[out] pIndex     index of maximum value returned here
  */
   void arm_max_q7(
-  q7_t * pSrc,
-  uint32_t blockSize,
-  q7_t * pResult,
-  uint32_t * pIndex);
+  const q7_t * pSrc,
+        uint32_t blockSize,
+        q7_t * pResult,
+        uint32_t * pIndex);
 
 
 /**
@@ -6488,10 +6644,10 @@ void arm_rfft_fast_f32(
  * @param[out] pIndex     index of maximum value returned here
  */
   void arm_max_q15(
-  q15_t * pSrc,
-  uint32_t blockSize,
-  q15_t * pResult,
-  uint32_t * pIndex);
+  const q15_t * pSrc,
+        uint32_t blockSize,
+        q15_t * pResult,
+        uint32_t * pIndex);
 
 
 /**
@@ -6502,10 +6658,10 @@ void arm_rfft_fast_f32(
  * @param[out] pIndex     index of maximum value returned here
  */
   void arm_max_q31(
-  q31_t * pSrc,
-  uint32_t blockSize,
-  q31_t * pResult,
-  uint32_t * pIndex);
+  const q31_t * pSrc,
+        uint32_t blockSize,
+        q31_t * pResult,
+        uint32_t * pIndex);
 
 
 /**
@@ -6516,10 +6672,10 @@ void arm_rfft_fast_f32(
  * @param[out] pIndex     index of maximum value returned here
  */
   void arm_max_f32(
-  float32_t * pSrc,
-  uint32_t blockSize,
-  float32_t * pResult,
-  uint32_t * pIndex);
+  const float32_t * pSrc,
+        uint32_t blockSize,
+        float32_t * pResult,
+        uint32_t * pIndex);
 
 
   /**
@@ -6530,10 +6686,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_mult_cmplx_q15(
-  q15_t * pSrcA,
-  q15_t * pSrcB,
-  q15_t * pDst,
-  uint32_t numSamples);
+  const q15_t * pSrcA,
+  const q15_t * pSrcB,
+        q15_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6544,10 +6700,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_mult_cmplx_q31(
-  q31_t * pSrcA,
-  q31_t * pSrcB,
-  q31_t * pDst,
-  uint32_t numSamples);
+  const q31_t * pSrcA,
+  const q31_t * pSrcB,
+        q31_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6558,10 +6714,10 @@ void arm_rfft_fast_f32(
    * @param[in]  numSamples  number of complex samples in each vector
    */
   void arm_cmplx_mult_cmplx_f32(
-  float32_t * pSrcA,
-  float32_t * pSrcB,
-  float32_t * pDst,
-  uint32_t numSamples);
+  const float32_t * pSrcA,
+  const float32_t * pSrcB,
+        float32_t * pDst,
+        uint32_t numSamples);
 
 
   /**
@@ -6571,9 +6727,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q31(
-  float32_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6583,9 +6739,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q15(
-  float32_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6595,9 +6751,21 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q7(
-  float32_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const float32_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void arm_q31_to_float(
+  const q31_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6607,9 +6775,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q31_to_q15(
-  q31_t * pSrc,
-  q15_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6619,9 +6787,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q31_to_q7(
-  q31_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q31_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6631,9 +6799,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_float(
-  q15_t * pSrc,
-  float32_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6643,9 +6811,9 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_q31(
-  q15_t * pSrc,
-  q31_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6655,9 +6823,45 @@ void arm_rfft_fast_f32(
    * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_q7(
-  q15_t * pSrc,
-  q7_t * pDst,
-  uint32_t blockSize);
+  const q15_t * pSrc,
+        q7_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to floating-point vector.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
+   */
+  void arm_q7_to_float(
+  const q7_t * pSrc,
+        float32_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q31 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_q7_to_q31(
+  const q7_t * pSrc,
+        q31_t * pDst,
+        uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q15 vector.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
+   */
+  void arm_q7_to_q15(
+  const q7_t * pSrc,
+        q15_t * pDst,
+        uint32_t blockSize);
 
 
   /**
@@ -6716,21 +6920,20 @@ void arm_rfft_fast_f32(
    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
    */
 
+
   /**
    * @addtogroup BilinearInterpolate
    * @{
    */
 
-
   /**
-  *
   * @brief  Floating-point bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate.
   * @param[in]     Y  interpolation coordinate.
   * @return out interpolated value.
   */
-  CMSIS_INLINE __STATIC_INLINE float32_t arm_bilinear_interp_f32(
+  __STATIC_FORCEINLINE float32_t arm_bilinear_interp_f32(
   const arm_bilinear_interp_instance_f32 * S,
   float32_t X,
   float32_t Y)
@@ -6789,14 +6992,13 @@ void arm_rfft_fast_f32(
 
 
   /**
-  *
   * @brief  Q31 bilinear interpolation.
   * @param[in,out] S  points to an instance of the interpolation structure.
   * @param[in]     X  interpolation coordinate in 12.20 format.
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-  CMSIS_INLINE __STATIC_INLINE q31_t arm_bilinear_interp_q31(
+  __STATIC_FORCEINLINE q31_t arm_bilinear_interp_q31(
   arm_bilinear_interp_instance_q31 * S,
   q31_t X,
   q31_t Y)
@@ -6870,7 +7072,7 @@ void arm_rfft_fast_f32(
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-  CMSIS_INLINE __STATIC_INLINE q15_t arm_bilinear_interp_q15(
+  __STATIC_FORCEINLINE q15_t arm_bilinear_interp_q15(
   arm_bilinear_interp_instance_q15 * S,
   q31_t X,
   q31_t Y)
@@ -6948,7 +7150,7 @@ void arm_rfft_fast_f32(
   * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-  CMSIS_INLINE __STATIC_INLINE q7_t arm_bilinear_interp_q7(
+  __STATIC_FORCEINLINE q7_t arm_bilinear_interp_q7(
   arm_bilinear_interp_instance_q7 * S,
   q31_t X,
   q31_t Y)
@@ -7046,7 +7248,7 @@ void arm_rfft_fast_f32(
 
 #if   defined ( __CC_ARM )
   /* Enter low optimization region - place directly above function definition */
-  #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
+  #if defined( __ARM_ARCH_7EM__ )
     #define LOW_OPTIMIZATION_ENTER \
        _Pragma ("push")         \
        _Pragma ("O1")
@@ -7055,7 +7257,7 @@ void arm_rfft_fast_f32(
   #endif
 
   /* Exit low optimization region - place directly after end of function definition */
-  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+  #if defined ( __ARM_ARCH_7EM__ )
     #define LOW_OPTIMIZATION_EXIT \
        _Pragma ("pop")
   #else
@@ -7083,7 +7285,7 @@ void arm_rfft_fast_f32(
 
 #elif defined ( __ICCARM__ )
   /* Enter low optimization region - place directly above function definition */
-  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+  #if defined ( __ARM_ARCH_7EM__ )
     #define LOW_OPTIMIZATION_ENTER \
        _Pragma ("optimize=low")
   #else
@@ -7094,7 +7296,7 @@ void arm_rfft_fast_f32(
   #define LOW_OPTIMIZATION_EXIT
 
   /* Enter low optimization region - place directly above function definition */
-  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+  #if defined ( __ARM_ARCH_7EM__ )
     #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
        _Pragma ("optimize=low")
   #else
@@ -7145,6 +7347,8 @@ void arm_rfft_fast_f32(
 
 #elif defined ( __TASKING__ )
 
+#elif defined ( _MSC_VER )
+
 #else
   #error Unknown compiler
 #endif
-- 
cgit