Simd Library Documentation.

Home | Release Notes | Download | Documentation | Issues | GitHub

Functions for accelerating of inference of neural network in Synet Framework. More...

Modules

 Activation functions
 Functions to acceleratе activation functions in Synet Framework.
 
 Conversion functions
 Functions to acceleratе conversion in Synet Framework.
 
 FusedLayer functions
 Functions to acceleratе FusedLayer in Synet Framework.
 
 FP32 convolution framework
 A framework to accelerate FP32 convolution in Synet Framework.
 
 INT8 convolution framework
 A framework to accelerate INT8 convolution in Synet Framework.
 
 FP32 deconvolution framework
 A framework to accelerate FP32 deconvolution in Synet Framework.
 
 FP32 merged convolution frameworks
 A framework to accelerate FP32 merged convolution in Synet Framework.
 
 INT8 merged convolution frameworks
 A framework to accelerate INT8 merged convolution in Synet Framework.
 
 Scale functions
 Functions to acceleratе layer scale in Synet Framework.
 
 Winograd functions
 Functions to acceleratе Winograd convolution algorithm in Synet Framework.
 

Data Structures

struct  SimdConvolutionParameters
 

Typedefs

typedef void(* SimdGemm32fNNPtr) (size_t M, size_t N, size_t K, const float *alpha, const float *A, size_t lda, const float *B, size_t ldb, const float *beta, float *C, size_t ldc)
 Callback function type "SimdGemm32fNNPtr";. More...
 
typedef struct SimdConvolutionParameters SimdConvolutionParameters
 

Enumerations

enum  SimdConvolutionActivationType {
  SimdConvolutionActivationIdentity = 0,
  SimdConvolutionActivationRelu,
  SimdConvolutionActivationLeakyRelu,
  SimdConvolutionActivationRestrictRange,
  SimdConvolutionActivationPrelu,
  SimdConvolutionActivationElu,
  SimdConvolutionActivationHswish
}
 
enum  SimdSynetCompatibilityType {
  SimdSynetCompatibilityFmaUse = 0,
  SimdSynetCompatibilityFmaNoTail = 1,
  SimdSynetCompatibilityFmaAvoid = 2,
  SimdSynetCompatibilityFmaMask = 3,
  SimdSynetCompatibility8iPrecise = 0,
  SimdSynetCompatibility8iOverflow = 4,
  SimdSynetCompatibility8iNarrowed = 8,
  SimdSynetCompatibility8iMask = 12,
  SimdSynetCompatibilityFloatZero = 16
}
 
enum  SimdSynetEltwiseOperationType {
  SimdSynetEltwiseOperationProduct,
  SimdSynetEltwiseOperationSum,
  SimdSynetEltwiseOperationMax,
  SimdSynetEltwiseOperationMin
}
 
enum  SimdSynetUnaryOperation32fType {
  SimdSynetUnaryOperation32fAbs,
  SimdSynetUnaryOperation32fExp,
  SimdSynetUnaryOperation32fLog,
  SimdSynetUnaryOperation32fNeg,
  SimdSynetUnaryOperation32fRsqrt,
  SimdSynetUnaryOperation32fSqrt,
  SimdSynetUnaryOperation32fTanh,
  SimdSynetUnaryOperation32fZero
}
 
enum  SimdTensorFormatType {
  SimdTensorFormatUnknown = -1,
  SimdTensorFormatNchw,
  SimdTensorFormatNhwc,
  SimdTensorFormatNchw4c,
  SimdTensorFormatNchw8c,
  SimdTensorFormatNchw16c,
  SimdTensorFormatNchwXc,
  SimdTensorFormatOiyx,
  SimdTensorFormatYxio,
  SimdTensorFormatOyxi4o,
  SimdTensorFormatOyxi8o,
  SimdTensorFormatOyxi16o,
  SimdTensorFormatOyxiXo
}
 
enum  SimdTensorDataType {
  SimdTensorDataUnknown = -1,
  SimdTensorData32f,
  SimdTensorData32i,
  SimdTensorData8i,
  SimdTensorData8u
}
 

Functions

SIMD_API void SimdSynetAddBias (const float *bias, size_t channels, size_t spatial, float *dst, SimdTensorFormatType format)
 Adds a bias to given vector. More...
 
SIMD_API void SimdSynetAdd8i (const uint8_t *aData, const float *aScale, const float *aShift, const uint8_t *bData, const float *bScale, const float *bShift, uint8_t *cData, const float *cScale, const float *cShift, size_t batch, size_t channels, size_t spatial, SimdTensorFormatType format, SimdSynetCompatibilityType compatibility)
 Adds two INT8 tensors. More...
 
SIMD_API void SimdSynetEltwiseLayerForward (float const *const *src, const float *weight, size_t count, size_t size, SimdSynetEltwiseOperationType type, float *dst)
 This function is used for forward propagation of EltwiseLayer. More...
 
SIMD_API void SimdSynetInnerProductLayerForward (const float *src, const float *weight, const float *bias, size_t count, size_t size, float *dst)
 This function is used for forward propagation of InnerProductLayer. More...
 
SIMD_API void SimdSynetInnerProduct8i (size_t M, size_t N, size_t K, const uint8_t *src, const int8_t *weight, int32_t *dst, SimdSynetCompatibilityType compatibility)
 This function is used for INT8 forward propagation of InnerProductLayer. More...
 
SIMD_API void SimdSynetLrnLayerCrossChannels (const float *src, size_t half, size_t channels, size_t spatial, const float *k, float *dst, SimdTensorFormatType format)
 This function is used for forward propagation of LrnLayer (cross channels normalization). More...
 
SIMD_API void SimdSynetPoolingForwardAverage (const float *src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, size_t strideY, size_t strideX, size_t padY, size_t padX, float *dst, size_t dstH, size_t dstW, SimdBool excludePad, SimdTensorFormatType format)
 This function is used for forward propagation of PoolingLayer (AveragePooling). More...
 
SIMD_API void SimdSynetPoolingForwardMax32f (const float *src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, size_t strideY, size_t strideX, size_t padY, size_t padX, float *dst, size_t dstH, size_t dstW, SimdTensorFormatType format)
 This function is used for forward propagation of PoolingLayer (MaxPooling, 32-bit float). More...
 
SIMD_API void SimdSynetPoolingForwardMax8u (const uint8_t *src, size_t srcC, size_t srcH, size_t srcW, size_t kernelY, size_t kernelX, size_t strideY, size_t strideX, size_t padY, size_t padX, uint8_t *dst, size_t dstH, size_t dstW, SimdTensorFormatType format)
 This function is used for forward propagation of PoolingLayer (MaxPooling, 8-bit unsigned integer). More...
 
SIMD_API void SimdSynetShuffleLayerForward (const float *src0, const float *src1, size_t channels0, size_t channels1, size_t spatial, float *dst0, float *dst1, SimdTensorFormatType format, int type)
 This function is used for forward propagation of ShuffleLayer. More...
 
SIMD_API void SimdSynetSoftmaxLayerForward (const float *src, size_t outer, size_t count, size_t inner, float *dst)
 This function is used for forward propagation of SoftmaxLayer. More...
 
SIMD_API SimdTensorFormatType SimdSynetSpecifyTensorFormat (SimdTensorFormatType format)
 Specifies hardware optimized tensor format of 5D-tensor for (input/output) image or 2D-convolution filter. More...
 
SIMD_API size_t SimdSynetTensorAlignment (SimdTensorFormatType format)
 Gets alignment requred for current tensor format. More...
 
SIMD_API void SimdSynetUnaryOperation32fLayerForward (const float *src, size_t size, SimdSynetUnaryOperation32fType type, float *dst)
 This function is used for forward propagation of UnaryOperationLayer. More...
 

Detailed Description

Functions for accelerating of inference of neural network in Synet Framework.

Typedef Documentation

◆ SimdGemm32fNNPtr

typedef void(* SimdGemm32fNNPtr) (size_t M, size_t N, size_t K, const float *alpha, const float *A, size_t lda, const float *B, size_t ldb, const float *beta, float *C, size_t ldc)

Callback function type "SimdGemm32fNNPtr";.

The function has to perform general matrix multiplication (for 32-bit float numbers).

C(M, N) = alpha*A(M, K)*B(K, N) + beta*C(M, N);
Parameters
[in]M- a height of A and height of C matrices.
[in]N- a width of B and width of C matrices.
[in]K- a width of A and height of B matrices.
[in]alpha- a pointer to multiplier of the first term.
[in]A- a pointer to input A matrix.
[in]lda- a leading dimension of A matrix.
[in]B- a pointer to input B matrix.
[in]ldb- a leading dimension of B matrix.
[in]beta- a pointer to multiplier of the second term.
[out]C- a pointer to output C matrix.
[in]ldc- a leading dimension of C matrix.

◆ SimdConvolutionParameters

Enumeration Type Documentation

◆ SimdConvolutionActivationType

Describes type of activation function. It is used in SimdSynetConvolution32fInit, SimdSynetConvolution8iInit, SimdSynetDeconvolution32fInit and SimdSynetMergedConvolution32fInit.

Enumerator
SimdConvolutionActivationIdentity 

Identity (activation function is absent).

SimdConvolutionActivationRelu 

ReLU activation function.

dst[i] = Max(0, src[i]);
SimdConvolutionActivationLeakyRelu 

Leaky ReLU activation function. It has one parameter: slope (params[0]).

dst[i] = src[i] > 0 ? src[i] : slope*src[i];
SimdConvolutionActivationRestrictRange 

The activation function restricts range. It has two parameters: lower (params[0]) and upper (params[1]) bound.

dst[i] = Min(Max(lower, src[i]), upper);
SimdConvolutionActivationPrelu 

Leaky PReLU activation function. It has m parameters: slopes[m] (m = dstC, n = dstH*dstW).

dst[i*n + j] = src[i*n + j] > 0 ? src[i*n + j] : slopes[i]*src[i*n + j];
SimdConvolutionActivationElu 

Leaky ELU activation function. It has one parameter: alpha (params[0]).

dst[i] = src[i] >= 0 ? src[i] : alpha*(Exp(src[i]) - 1);
SimdConvolutionActivationHswish 

H-Swish (https://arxiv.org/pdf/1905.02244.pdf) activation function. It has two parameters: shift (params[0]) and scale (params[1]).

dst[i] = Max(Min(src[i], shift) + shift, 0)*scale*src[i];

◆ SimdSynetCompatibilityType

Describes Synet compatibility flags. This type used in functions SimdSynetScaleLayerForward, SimdSynetConvert32fTo8u, SimdSynetConvolution8iInit, and SimdSynetMergedConvolution8iInit.

Enumerator
SimdSynetCompatibilityFmaUse 

Fast (No compatibility for fast code).

SimdSynetCompatibilityFmaNoTail 

Not use FMA instructions at row tail.

SimdSynetCompatibilityFmaAvoid 

Not use FMA instructions.

SimdSynetCompatibilityFmaMask 

Bit mask of options of FMA instructions using.

SimdSynetCompatibility8iPrecise 

Using of precise 8-bit integer multiplication (VNNI, or its 16-bit emulation).

SimdSynetCompatibility8iOverflow 

Allow 16-bit integer overflow.

SimdSynetCompatibility8iNarrowed 

Using of narrowed range (siggned: [-90 .. 90], unsigned: [0 .. 180]) to awoid 16-bit integer overflow.

SimdSynetCompatibility8iMask 

Bit mask of options of 8-bit integer multiplication.

SimdSynetCompatibilityFloatZero 

Bit flag of asymmetric 8-bit integer quantization.

◆ SimdSynetEltwiseOperationType

Describes operation type used in function SimdSynetEltwiseLayerForward.

Enumerator
SimdSynetEltwiseOperationProduct 

Product.

SimdSynetEltwiseOperationSum 

Weighted sum.

SimdSynetEltwiseOperationMax 

Maximum.

SimdSynetEltwiseOperationMin 

Minimum.

◆ SimdSynetUnaryOperation32fType

Describes operation type used in function SimdSynetUnaryOperation32fLayerForward.

Enumerator
SimdSynetUnaryOperation32fAbs 

Gets absolute value for every point of input tensor.

SimdSynetUnaryOperation32fExp 

Gets exponent for every point of input tensor.

SimdSynetUnaryOperation32fLog 

Gets logarithm for every point of input tensor.

SimdSynetUnaryOperation32fNeg 

Gets negative for every point of input tensor.

SimdSynetUnaryOperation32fRsqrt 

Gets reverse square root for every point of input tensor.

SimdSynetUnaryOperation32fSqrt 

Gets square root for every point of input tensor.

SimdSynetUnaryOperation32fTanh 

Gets hyperbolic tangent for every point of input tensor.

SimdSynetUnaryOperation32fZero 

Gets zero value for every point of input tensor.

◆ SimdTensorFormatType

Describes Synet Framework 4D-tensor format type.

Enumerator
SimdTensorFormatUnknown 

Unknown tensor format.

SimdTensorFormatNchw 

NCHW (N - batch, C - channels, H - height, W - width) 4D-tensor format of (input/output) image.

SimdTensorFormatNhwc 

NHWC (N - batch, H - height, W - width, C - channels) 4D-tensor format of (input/output) image.

SimdTensorFormatNchw4c 

NCHW4c (N - batch, C - (channels + 3) / 4, H - height, W - width, 4c - channels gropped by 4) special 5D-tensor format of (input/output) image optimized for SSE and NEON.

SimdTensorFormatNchw8c 

NCHW8c (N - batch, C - (channels + 7) / 8, H - height, W - width, 8c - channels gropped by 8) special 5D-tensor format of (input/output) image optimized for AVX and AVX2.

SimdTensorFormatNchw16c 

NCHW16c (N - batch, C - (channels + 15) / 16, H - height, W - width, 16c - channels gropped by 16) special 5D-tensor format of (input/output) image optimized for AVX-512.

SimdTensorFormatNchwXc 

Unspecified hardware optimized 5D-tensor format of (input/output) image. Specific format (SimdTensorFormatNchw4c, SimdTensorFormatNchw8c or SimdTensorFormatNchw16c) is determinated by function SimdSynetSpecifyTensorFormat.

SimdTensorFormatOiyx 

OIYX (O - output channels, I - input channels, Y - kernel height, X - kernel width) 4D-tensor format of 2D-convolution filter.

SimdTensorFormatYxio 

YXIO (Y - kernel height, X - kernel width, I - input channels, O - output channels) 4D-tensor format of 2D-convolution filter.

SimdTensorFormatOyxi4o 

OYXI4o (O - (output channels + 3)/4, Y - kernel height, X - kernel width, I - input channels, 4o - output channels gropped by 4) special 5D-tensor format of 2D-convolution filter optimized for SSE and NEON.

SimdTensorFormatOyxi8o 

OYXI8o (O - (output channels + 7)/8, Y - kernel height, X - kernel width, I - input channels, 8o - output channels gropped by 8) special 5D-tensor format of 2D-convolution filter optimized for AVX and AVX2.

SimdTensorFormatOyxi16o 

OYXI16o (O - (output channels + 15)/16, Y - kernel height, X - kernel width, I - input channels, 16o - output channels gropped by 16) special 5D-tensor format of 2D-convolution filter optimized for AVX-512.

SimdTensorFormatOyxiXo 

Unspecified hardware optimized 5D-tensor format of 2D-convolution filter. Specific format (SimdTensorFormatOyxi4o, SimdTensorFormatOyxi8o or SimdTensorFormatOyxi16o) is determinated by function SimdSynetSpecifyTensorFormat.

◆ SimdTensorDataType

Describes Synet Framework tensor data type.

Enumerator
SimdTensorDataUnknown 

Unknown tensor data type.

SimdTensorData32f 

32-bit float point.

SimdTensorData32i 

32-bit signed integer.

SimdTensorData8i 

8-bit signed integer.

SimdTensorData8u 

8-bit unsigned integer.

Function Documentation

◆ SimdSynetAddBias()

void SimdSynetAddBias ( const float *  bias,
size_t  channels,
size_t  spatial,
float *  dst,
SimdTensorFormatType  format 
)

Adds a bias to given vector.

Algorithm's details (example for NCHW tensor format):

for(c = 0; c < channels; ++c)
    for(j = 0; j < spatial; ++j)
         dst[c*spatial + s] += bias[c];
Note
This function is used in Synet Framework.
Parameters
[in]bias- a pointer to the 32-bit float array with bias coefficients. The size of the array is SimdAlign (channels, SimdSynetTensorAlignment (format)).
[in]channels- a number of channels in the image tensor.
[in]spatial- a spatial size of image tensor.
[in,out]dst- a pointer to cumulative 32-bit image tensor. The size of the array is SimdAlign (channels, SimdSynetTensorAlignment (format)) * spatial.
[in]format- a format of image tensor.

◆ SimdSynetAdd8i()

void SimdSynetAdd8i ( const uint8_t *  aData,
const float *  aScale,
const float *  aShift,
const uint8_t *  bData,
const float *  bScale,
const float *  bShift,
uint8_t *  cData,
const float *  cScale,
const float *  cShift,
size_t  batch,
size_t  channels,
size_t  spatial,
SimdTensorFormatType  format,
SimdSynetCompatibilityType  compatibility 
)

Adds two INT8 tensors.

Algorithm's details (example for NCHW tensor format):

for(b = 0; b < batch; ++b)
    for(c = 0; c < channels; ++c)
        for(s = 0; s < spatial; ++s)
        {
             offs = (b*channels + c)*spatial + s;
             A = aData[offs]*aScale[c] + aShift[c]; 
             B = bData[offs]*bScale[c] + bShift[c];
             cData[offs] = round((A + B)*cScale[c] + cShift[c]);
        }
Note
This function is used in Synet Framework.
Parameters
[in]aData- a pointer to the first input 8-bit integer tensor.
[in]aScale- a pointer to the 32-bit float array with scale coefficients of the first input tensor.
[in]aShift- a pointer to the 32-bit float array with shift coefficients of the first input tensor.
[in]bData- a pointer to the second input 8-bit integer tensor.
[in]bScale- a pointer to the 32-bit float array with scale coefficients of the second input tensor.
[in]bShift- a pointer to the 32-bit float array with shift coefficients of the second input tensor.
[out]cData- a pointer to the output 8-bit integer tensor.
[in]cScale- a pointer to the 32-bit float array with scale coefficients of the output tensor.
[in]cShift- a pointer to the 32-bit float array with shift coefficients of the output tensor.
[in]batch- a batch size of input and output image tensors.
[in]channels- a number of channels in input and output image tensors.
[in]spatial- a spatial size of input and output image tensors.
[in]format- a format of input and output image tensors.
[in]compatibility- a flags of bitwise compatibility.

◆ SimdSynetEltwiseLayerForward()

void SimdSynetEltwiseLayerForward ( float const *const *  src,
const float *  weight,
size_t  count,
size_t  size,
SimdSynetEltwiseOperationType  type,
float *  dst 
)

This function is used for forward propagation of EltwiseLayer.

Algorithm's details for SimdSynetEltwiseOperationProduct:

for(j = 0; j < size; ++j)
    dst[j] = 1;
for(i = 0; i < count; ++i)
    for(j = 0; j < size; ++j)
        dst[j] *= src[i][j];

Algorithm's details for SimdSynetEltwiseOperationSum:

for(j = 0; j < size; ++j)
    dst[j] = 0;
for(i = 0; i < count; ++i)
    for(j = 0; j < size; ++j)
        dst[j] += src[i][j]*weight[i];

Algorithm's details for SimdSynetEltwiseOperationMax:

for(j = 0; j < size; ++j)
    dst[j] = -FLT_MAX;
for(i = 0; i < count; ++i)
    for(j = 0; j < size; ++j)
        dst[j] = Max(dst[j], src[i][j]);

Algorithm's details for SimdSynetEltwiseOperationMin:

for(j = 0; j < size; ++j)
    dst[j] = FLT_MAX;
for(i = 0; i < count; ++i)
    for(j = 0; j < size; ++j)
        dst[j] = Min(dst[j], src[i][j]);
Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to poitres to the input 32-bit float arrays.
[in]weight- a pointer to the 32-bit float array with sum coefficients. It is need only for SimdSynetEltwiseOperationSum operation type otherwise it can be NULL.
[in]count- a count of input arrays. Must be at least 2.
[in]size- a size of the input and output arrays.
[in]type- a type of operation (see SimdSynetEltwiseOperationType).
[out]dst- a pointer to the output 32-bit float array.

◆ SimdSynetInnerProductLayerForward()

void SimdSynetInnerProductLayerForward ( const float *  src,
const float *  weight,
const float *  bias,
size_t  count,
size_t  size,
float *  dst 
)

This function is used for forward propagation of InnerProductLayer.

Algorithm's details:

for(i = 0; i < count; ++i)
{
    dst[i] = (bias ? bias[i] : 0);
    for(j = 0; j < size; ++j)
       dst[i] += src[j]*weight[i*size + j];
}
Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the input 32-bit float array. The size of the array must be equal to size.
[in]weight- a pointer to the 32-bit float array with weight coefficients. The size of the array must be equal to count*size.
[in]bias- a pointer to the 32-bit float array with bias coefficients. The size of the array must be equal to count. Can be NULL.
[in]count- a size of output array.
[in]size- a size of input array.
[out]dst- a pointer to the output 32-bit float array. The size of the array must be equal to count.

◆ SimdSynetInnerProduct8i()

void SimdSynetInnerProduct8i ( size_t  M,
size_t  N,
size_t  K,
const uint8_t *  src,
const int8_t *  weight,
int32_t *  dst,
SimdSynetCompatibilityType  compatibility 
)

This function is used for INT8 forward propagation of InnerProductLayer.

Algorithm's details:

for (i = 0; i < M; ++i)
{
    for (j = 0; j < N; ++j)
    {
        sum = 0;
        for (k = 0; k < K; ++k)
            sum += src[i * K + k] * weight[j * K + k];
        dst[i*N + j] = sum;
    }
}
Note
This function is used in Synet Framework.
Parameters
[in]M- a batch size.
[in]N- an output size.
[in]K- an input size.
[in]src- a pointer to the input 8-bit unsigned integer array. The size of the array must be equal to M*K.
[in]weight- a pointer to the 8-bit signed integer array with weight. The size of the array must be equal to N*K.
[out]dst- a pointer to the output 32-bit integer array. The size of the array must be equal to M*N.
[in]compatibility- a flags of bitwise compatibility.

◆ SimdSynetLrnLayerCrossChannels()

void SimdSynetLrnLayerCrossChannels ( const float *  src,
size_t  half,
size_t  channels,
size_t  spatial,
const float *  k,
float *  dst,
SimdTensorFormatType  format 
)

This function is used for forward propagation of LrnLayer (cross channels normalization).

Algorithm's details (example for NCHW tensor format):

for(c = 0; c < channels; ++c)
    for(s = 0; s < spatial; ++s)
    {
        lo = Max(0, c - half);
        hi = Min(channels, c + half + 1);
        sum = 0;
        for(i = lo; i < ln; ++i)
            sum += Square(src[i*spatial + s]);
        dst[c*spatial + s] = src[c*spatial + s]*Pow(k[0] + sum*k[1], k[2]);
    }
Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the 32-bit float array with input image tensor. The size of the array is SimdAlign (channels, SimdSynetTensorAlignment (format)) * spatial.
[in]half- a local normalization half size.
[in]channels- a number of channels in the (input/output) image tensor
[in]spatial- a spatial size of (input/output) image tensor.
[in]k- a pointer to the 32-bit float array with 3 coefficients (see algorithm details).
[out]dst- a pointer to the 32-bit float array with output image tensor. The size of the array is SimdAlign (channels, SimdSynetTensorAlignment (format)) * spatial.
[in]format- a format of (input/output) image tensor.

◆ SimdSynetPoolingForwardAverage()

void SimdSynetPoolingForwardAverage ( const float *  src,
size_t  srcC,
size_t  srcH,
size_t  srcW,
size_t  kernelY,
size_t  kernelX,
size_t  strideY,
size_t  strideX,
size_t  padY,
size_t  padX,
float *  dst,
size_t  dstH,
size_t  dstW,
SimdBool  excludePad,
SimdTensorFormatType  format 
)

This function is used for forward propagation of PoolingLayer (AveragePooling).

Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the input 32-bit float array. The size of the array must be equal to srcC*srcH*srcW.
[in]srcC- a number of input and output channels.
[in]srcH- an input height.
[in]srcW- an input width.
[in]kernelY- a height of the pooling kernel.
[in]kernelX- a width of the pooling kernel.
[in]strideY- a y-stride of the pooling.
[in]strideX- a x-stride of the pooling.
[in]padY- a pad to the top of the input image.
[in]padX- a pad to the left of the input image.
[out]dst- a pointer to the output 32-bit float array. The size of the array must be equal to srcC*dstH*dstW.
[in]dstH- an output height.
[in]dstW- an output width.
[in]excludePad- a flag of exclude pad from average value calculation.
[in]format- a format of (input/output) image tensor.

◆ SimdSynetPoolingForwardMax32f()

void SimdSynetPoolingForwardMax32f ( const float *  src,
size_t  srcC,
size_t  srcH,
size_t  srcW,
size_t  kernelY,
size_t  kernelX,
size_t  strideY,
size_t  strideX,
size_t  padY,
size_t  padX,
float *  dst,
size_t  dstH,
size_t  dstW,
SimdTensorFormatType  format 
)

This function is used for forward propagation of PoolingLayer (MaxPooling, 32-bit float).

Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the input 32-bit float array. The size of the array must be equal to srcC*srcH*srcW.
[in]srcC- a number of input and output channels.
[in]srcH- an input height.
[in]srcW- an input width.
[in]kernelY- a height of the pooling kernel.
[in]kernelX- a width of the pooling kernel.
[in]strideY- a y-stride of the pooling.
[in]strideX- a x-stride of the pooling.
[in]padY- a pad to the top of the input image.
[in]padX- a pad to the left of the input image.
[out]dst- a pointer to the output 32-bit float array. The size of the array must be equal to srcC*dstH*dstW.
[in]dstH- an output height.
[in]dstW- an output width.
[in]format- a format of (input/output) image tensor.

◆ SimdSynetPoolingForwardMax8u()

void SimdSynetPoolingForwardMax8u ( const uint8_t *  src,
size_t  srcC,
size_t  srcH,
size_t  srcW,
size_t  kernelY,
size_t  kernelX,
size_t  strideY,
size_t  strideX,
size_t  padY,
size_t  padX,
uint8_t *  dst,
size_t  dstH,
size_t  dstW,
SimdTensorFormatType  format 
)

This function is used for forward propagation of PoolingLayer (MaxPooling, 8-bit unsigned integer).

Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the input 8-bit unsigned integer array. The size of the array must be equal to srcC*srcH*srcW.
[in]srcC- a number of input and output channels.
[in]srcH- an input height.
[in]srcW- an input width.
[in]kernelY- a height of the pooling kernel.
[in]kernelX- a width of the pooling kernel.
[in]strideY- a y-stride of the pooling.
[in]strideX- a x-stride of the pooling.
[in]padY- a pad to the top of the input image.
[in]padX- a pad to the left of the input image.
[out]dst- a pointer to the output 8-bit unsigned integer array. The size of the array must be equal to srcC*dstH*dstW.
[in]dstH- an output height.
[in]dstW- an output width.
[in]format- a format of (input/output) image tensor.

◆ SimdSynetShuffleLayerForward()

void SimdSynetShuffleLayerForward ( const float *  src0,
const float *  src1,
size_t  channels0,
size_t  channels1,
size_t  spatial,
float *  dst0,
float *  dst1,
SimdTensorFormatType  format,
int  type 
)

This function is used for forward propagation of ShuffleLayer.

Note
This function is used in Synet Framework.
Parameters
[in]src0- a pointer to the 32-bit float array with the first input image tensor.
[in]src1- a pointer to the 32-bit float array with the second input image tensor.
[in]channels0- a number of channels in the first input (type == 0) or output (type == 1) image tensor. It must be even number.
[in]channels1- a number of channels in the second input (type == 0) or output (type == 1) image tensor. It must be even number.
[in]spatial- a spatial size of (input/output) image tensors.
[out]dst0- a pointer to the 32-bit float array with the first output image tensor.
[out]dst1- a pointer to the 32-bit float array with the second output image tensor.
[in]format- a format of (input/output) image tensors.
[in]type- a shuffle type (it can be 0 or 1).

◆ SimdSynetSoftmaxLayerForward()

void SimdSynetSoftmaxLayerForward ( const float *  src,
size_t  outer,
size_t  count,
size_t  inner,
float *  dst 
)

This function is used for forward propagation of SoftmaxLayer.

Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to the input 32-bit float array. The size of the array must be equal to outer*count*inner.
[in]outer- an outer size of input and output arrays.
[in]count- a size of softmax dimmension.
[in]inner- an inner size of input and output arrays.
[out]dst- a pointer to the output 32-bit float array. The size of the array must be equal to outer*count*inner.

◆ SimdSynetSpecifyTensorFormat()

SimdTensorFormatType SimdSynetSpecifyTensorFormat ( SimdTensorFormatType  format)

Specifies hardware optimized tensor format of 5D-tensor for (input/output) image or 2D-convolution filter.

Note
This function is used in Synet Framework.
Parameters
[in]format- an unspecified hardware optimized 5D-tensor format of (input/output) image or 2D-convolution filter. It can be SimdTensorFormatNchwXc or SimdTensorFormatOyxiXo.
Returns
specified hardware optimized 5D-tensor format.

◆ SimdSynetTensorAlignment()

size_t SimdSynetTensorAlignment ( SimdTensorFormatType  format)

Gets alignment requred for current tensor format.

Note
This function is used in Synet Framework.
Parameters
[in]format- a tensor format.
Returns
alignment requred for current tensor format.

◆ SimdSynetUnaryOperation32fLayerForward()

void SimdSynetUnaryOperation32fLayerForward ( const float *  src,
size_t  size,
SimdSynetUnaryOperation32fType  type,
float *  dst 
)

This function is used for forward propagation of UnaryOperationLayer.

Note
This function is used in Synet Framework.
Parameters
[in]src- a pointer to poitres to the input 32-bit float arrays.
[in]size- a size of the input and output arrays.
[in]type- an unary operation type (see SimdSynetUnaryOperation32fType).
[out]dst- a pointer to the output 32-bit float array.