|
|
@@ -3,229 +3,787 @@
|
|
|
"name": "Conv",
|
|
|
"schema": {
|
|
|
"category": "Layer",
|
|
|
+ "description": "\nThe convolution operator consumes an input vector, a filter blob\nand a bias blob and computes the output. \nNote that other parameters, such as the stride and\nkernel size, or the pads' sizes in each direction are not necessary for input\nbecause they are provided by the ConvPoolOpBase operator. Various dimension\nchecks are done implicitly, and the sizes are specified in the Input docs for\nthis operator. As is expected, the filter is convolved with a subset of the\nimage and the bias is added; this is done throughout the image data and the\noutput is computed. As a side note on the implementation layout:\nconv_op_impl.h is the templated implementation of the conv_op.h file, which is\nwhy they are separate files.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "X" },
|
|
|
- { "name": "filter" },
|
|
|
- { "name": "bias" }
|
|
|
+ {
|
|
|
+ "description": "Input data blob from previous layer; has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and width. Note that this is for the NCHW usage. On the other hand, the NHWC Op has a different set of dimension constraints. ",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The filter blob that will be used in the convolutions; has size (M x C x kH x kW), where C is the number of channels, and kH and kW are the height and width of the kernel.",
|
|
|
+ "name": "filter"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The 1D bias blob that is added through the convolution; has size (M).",
|
|
|
+ "name": "bias"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Output data blob that contains the result of the convolution. The output dimensions are functions of the kernel size, stride size, and pad lengths.",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "ConvTranspose",
|
|
|
"schema": {
|
|
|
"category": "Layer",
|
|
|
+ "description": "\nThe transposed convolution consumes an input vector, the filter blob, and\nthe bias blob, and computes the output. Note that other parameters, such as\nthe stride and kernel size, or the pads' sizes in each direction are not\nnecessary for input because they are provided by the\nConvTransposeUnpoolOpBase operator. Various dimension checks are done\nimplicitly, and the sizes are specified in the Input docs for this operator.\nAs is expected, the filter is deconvolved with a subset of the\nimage and the bias is added; this is done throughout the image data and the\noutput is computed. As a side note on the implementation layout:\nconv_transpose_op_impl.h is the templated implementation of the\nconv_transpose_op.h file, which is why they are separate files.\n ",
|
|
|
"inputs": [
|
|
|
- { "name": "X" },
|
|
|
- { "name": "filter" },
|
|
|
- { "name": "bias" }
|
|
|
+ {
|
|
|
+ "description": "Input data blob from previous layer; has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and width. Note that this is for the NCHW usage. On the other hand, the NHWC Op has a different set of dimension constraints.",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The filter blob that will be used in the transposed convolution; has size (M x C x kH x kW), where C is the number of channels, and kH and kW are the height and width of the kernel.",
|
|
|
+ "name": "filter"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The 1D bias blob that is added through the convolution;has size (C). Optional, if not passed, will treat it as all 0.",
|
|
|
+ "name": "bias"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Output data blob that contains the result of the transposed convolution. The output dimensions are functions of the kernel size, stride size, and pad lengths.",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "FC",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(int32_t) default to 1; describes the axis of the inputs; defaults to one because the 0th axis most likely describes the batch_size",
|
|
|
+ "name": "axis",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int32_t) default to 1; describes the axis of the weight matrix W; defaults to one because the 0th axis most likely describes the batch_size",
|
|
|
+ "name": "axis_w",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Whether to use float-16 compute kernel",
|
|
|
+ "name": "float16_compute",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"category": "Layer",
|
|
|
+ "description": "\nComputes the result of passing an input vector X into a fully\nconnected layer with 2D weight matrix W and 1D bias vector b. That is,\nthe layer computes Y = X * W^T + b, where X has size (M x K),\nW has size (N x K), b has size (N), and Y has size (M x N),\nwhere M is often the batch size.\n\n\nNOTE: X does not need to explicitly be a 2D vector; rather, it will be\ncoerced into one. For an arbitrary n-dimensional tensor\nX \\in [a_0, a_1, ...,a_{k-1}, a_k, ..., a_{n-1}] where a_i \\in N+ and k is\nthe axis provided, then X will be coerced into a 2-dimensional tensor with\ndimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default\ncase where axis=1, this means the X tensor will be coerced into a 2D tensor\nof dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.\nIn this situation, we must have a_0 = M and a_1 * ... * a_{n-1} = K.\nLastly, even though b is a 1D vector of size N, it is copied/resized to\nbe size (M x N) implicitly and added to each vector in the batch.\nEach of these dimensions must be matched correctly, or else the operator\nwill throw errors.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "X" },
|
|
|
- { "name": "W" },
|
|
|
- { "name": "b" }
|
|
|
+ {
|
|
|
+ "description": "input tensor that's coerced into a 2D matrix of size (MxK) as described above",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "A tensor that is coerced into a 2D blob of size (KxN) containing fully connected weight matrix",
|
|
|
+ "name": "W"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "1D blob containing bias vector",
|
|
|
+ "name": "b"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "2D output tensor",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Add",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "Pass 1 to enable broadcasting",
|
|
|
+ "name": "broadcast",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "If set, defines the broadcast dimensions. See doc for details.",
|
|
|
+ "name": "axis",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nPerforms element-wise binary addition (with limited broadcast support).\n\nIf necessary the right-hand-side argument will be broadcasted to match the\nshape of left-hand-side argument. When broadcasting is specified, the second\ntensor can either be of size 1 (a scalar value), or having its shape as a\ncontiguous subset of the first tensor's shape. The starting of the mutually\nequal shape is specified by the argument \"axis\", and if it is not set, suffix\nmatching is assumed. 1-dim expansion doesn't work yet.\n\nFor example, the following tensor shapes are supported (with broadcast=1):\n\n shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar\n shape(A) = (2, 3, 4, 5), shape(B) = (5,)\n shape(A) = (2, 3, 4, 5), shape(B) = (4, 5)\n shape(A) = (2, 3, 4, 5), shape(B) = (3, 4), with axis=1\n shape(A) = (2, 3, 4, 5), shape(B) = (2), with axis=0\n\nArgument `broadcast=1` needs to be passed to enable broadcasting.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "A" },
|
|
|
- { "name": "B" }
|
|
|
+ {
|
|
|
+ "description": "First operand, should share the type with the second operand.",
|
|
|
+ "name": "A"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Second operand. With broadcasting can be of smaller size than A. If broadcasting is disabled it should be of the same size.",
|
|
|
+ "name": "B"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "C" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Result, has same dimensions and type as A",
|
|
|
+ "name": "C"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Sum",
|
|
|
"schema": {
|
|
|
+ "description": "\nElement-wise sum of each of the input tensors. The first input tensor can be\nused in-place as the output tensor, in which case the sum will be done in\nplace and results will be accumulated in input0. All inputs and outputs must\nhave the same shape and data type.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "A" },
|
|
|
- { "name": "B" }
|
|
|
+ {
|
|
|
+ "name": "A"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "B"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "First of the input tensors. Can be inplace.",
|
|
|
+ "name": "data_0"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "C" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "name": "C"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Output tensor. Same dimension as inputs.",
|
|
|
+ "name": "sum"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Mul",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "Pass 1 to enable broadcasting",
|
|
|
+ "name": "broadcast",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "If set, defines the broadcast dimensions. See doc for details.",
|
|
|
+ "name": "axis",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nPerforms element-wise binary multiplication (with limited broadcast support).\n\nIf necessary the right-hand-side argument will be broadcasted to match the\nshape of left-hand-side argument. When broadcasting is specified, the second\ntensor can either be of size 1 (a scalar value), or having its shape as a\ncontiguous subset of the first tensor's shape. The starting of the mutually\nequal shape is specified by the argument \"axis\", and if it is not set, suffix\nmatching is assumed. 1-dim expansion doesn't work yet.\n\nFor example, the following tensor shapes are supported (with broadcast=1):\n\n shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar\n shape(A) = (2, 3, 4, 5), shape(B) = (5,)\n shape(A) = (2, 3, 4, 5), shape(B) = (4, 5)\n shape(A) = (2, 3, 4, 5), shape(B) = (3, 4), with axis=1\n shape(A) = (2, 3, 4, 5), shape(B) = (2), with axis=0\n\nArgument `broadcast=1` needs to be passed to enable broadcasting.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "A" },
|
|
|
- { "name": "B" }
|
|
|
+ {
|
|
|
+ "description": "First operand, should share the type with the second operand.",
|
|
|
+ "name": "A"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Second operand. With broadcasting can be of smaller size than A. If broadcasting is disabled it should be of the same size.",
|
|
|
+ "name": "B"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "C" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Result, has same dimensions and type as A",
|
|
|
+ "name": "C"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "MatMul",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "Exclusive axis that divides the first and second dimension of matrix A, default to 1",
|
|
|
+ "name": "axis_a",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Exclusive axis that divides the first and second dimension of matrix B, default to 1",
|
|
|
+ "name": "axis_b",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Pass 1 to transpose A before multiplication and after the dimension adjustment using axis_a",
|
|
|
+ "name": "trans_a",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Pass 1 to transpose B before multiplication and after the dimension adjustment using axis_b",
|
|
|
+ "name": "trans_b",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nMatrix multiplication Y = A * B, where A has size (M x K), B has size (K x N),\nand Y will have a size (M x N).\n",
|
|
|
"inputs": [
|
|
|
- { "name": "A" },
|
|
|
- { "name": "B" }
|
|
|
+ {
|
|
|
+ "description": "2D matrix of size (M x K)",
|
|
|
+ "name": "A"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "2D matrix of size (K x N)",
|
|
|
+ "name": "B"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "2D matrix of size (M x N)",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Relu",
|
|
|
"schema": {
|
|
|
- "category": "Activation"
|
|
|
+ "category": "Activation",
|
|
|
+ "description": "\nRelu takes one input data (Tensor<T>) and produces one output data\n(Tensor<T>) where the rectified linear function, y = max(0, x), is applied to\nthe tensor elementwise.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "1D input tensor",
|
|
|
+ "name": "X"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "1D input tensor",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Sigmoid",
|
|
|
"schema": {
|
|
|
- "category": "Activation"
|
|
|
+ "category": "Activation",
|
|
|
+ "description": "\nSigmoid takes one input data (Tensor<T>) and produces one output data\n(Tensor<T>) where the sigmoid function, y = 1 / (1 + exp(-x)), is applied to the\ntensor elementwise.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "1D input tensor",
|
|
|
+ "name": "X"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "1D output tensor",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "PRelu",
|
|
|
"schema": {
|
|
|
"category": "Activation",
|
|
|
+ "description": "\n\nPRelu takes input data (Tensor<T>) and slope tensor as input, and produces one\noutput data (Tensor<T>) where the function `f(x) = slope * x for x < 0`,\n`f(x) = x for x >= 0`., is applied to the data tensor elementwise.\n\n",
|
|
|
"inputs": [
|
|
|
- { "name": "X" },
|
|
|
- { "name": "Slope" }
|
|
|
+ {
|
|
|
+ "description": "1D input tensor",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "1D slope tensor. If `Slope` is of size 1, the value is sharedacross different channels",
|
|
|
+ "name": "Slope"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "1D input tensor",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Softmax",
|
|
|
"schema": {
|
|
|
- "category": "Activation"
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(int) default to 1; describes the axis of the inputs when coerced to 2D; defaults to one because the 0th axis most likely describes the batch_size",
|
|
|
+ "name": "axis",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "category": "Activation",
|
|
|
+ "description": "\nThe operator computes the softmax normalized values for each layer in the batch\n of the given input. The input is a 2-D tensor (Tensor<float>) of size\n(batch_size x input_feature_dimensions). The output tensor has the same shape\nand contains the softmax normalized values of the corresponding input.\n\nX does not need to explicitly be a 2D vector; rather, it will be\ncoerced into one. For an arbitrary n-dimensional tensor\nX \\in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is\nthe axis provided, then X will be coerced into a 2-dimensional tensor with\ndimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default\ncase where axis=1, this means the X tensor will be coerced into a 2D tensor\nof dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.\nIn this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.\nEach of these dimensions must be matched correctly, or else the operator\nwill throw errors.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "The input tensor that's coerced into a 2D matrix of size (NxD) as described above.",
|
|
|
+ "name": "input"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "The softmax normalized output values with the same shape as input tensor.",
|
|
|
+ "name": "output"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "MaxPool",
|
|
|
"schema": {
|
|
|
- "category": "Pool"
|
|
|
+ "category": "Pool",
|
|
|
+ "description": "MaxPool \nconsumes an input blob X and applies max pooling across the\nthe blob according to kernel sizes, stride sizes, and pad lengths defined by the\nConvPoolOpBase operator. Max pooling consisting of taking the maximum value of a\nsubset of the input tensor according to the kernel size and downsampling the\ndata into the output blob Y for further processing.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "Input data tensor from the previous operator; dimensions depend on whether the NCHW or NHWC operators are being used. For example, in the former, the input has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. The corresponding permutation of dimensions is used in the latter case.",
|
|
|
+ "name": "X"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "Output data tensor from max pooling across the input tensor. Dimensions will vary based on various kernel, stride, and pad sizes.",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "AveragePool",
|
|
|
"schema": {
|
|
|
- "category": "Pool"
|
|
|
+ "category": "Pool",
|
|
|
+ "description": "AveragePool \nconsumes an input blob X and applies average pooling across the\nthe blob according to kernel sizes, stride sizes, and pad lengths defined by the\nConvPoolOpBase operator. Average pooling consisting of averaging all values of a\nsubset of the input tensor according to the kernel size and downsampling the\ndata into the output blob Y for further processing.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "Input data tensor from the previous operator; dimensions depend on whether the NCHW or NHWC operators are being used. For example, in the former, the input has size (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. The corresponding permutation of dimensions is used in the latter case.",
|
|
|
+ "name": "X"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "Output data tensor from average pooling across the input tensor. Dimensions will vary based on various kernel, stride, and pad sizes.",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "SpatialBN",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "If set to nonzero, run spatial batch normalization in test mode.",
|
|
|
+ "name": "is_test"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The epsilon value to use to avoid division by zero.",
|
|
|
+ "name": "epsilon",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "A StorageOrder string.",
|
|
|
+ "name": "order",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Factor used in computing the running mean and variance.e.g., running_mean = running_mean * momentum + mean * (1 - momentum)",
|
|
|
+ "name": "momentum",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(Optional) Specifies the number of batches to apply normalization on. Requires specifying the optional sums and sumsq inputs that provide statistics across multiple batches from which mean and variance can be determined.",
|
|
|
+ "name": "num_batches",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"category": "Normalization",
|
|
|
+ "description": "\nCarries out spatial batch normalization as described in the paper\nhttps://arxiv.org/abs/1502.03167 . Depending on the mode it is being run,\nthere are multiple cases for the number of outputs, which we list below:\n\n\nOutput case #1:\n Y, mean, var, saved_mean, saved_var (training mode)\n\n\nOutput case #2:\n Y (test mode)\n",
|
|
|
"inputs": [
|
|
|
- { "name": "input" },
|
|
|
- { "name": "scale" },
|
|
|
- { "name": "bias" },
|
|
|
- { "name": "mean" },
|
|
|
- { "name": "var" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "name": "input"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The scale as a 1-dimensional tensor of size C to be applied to the output.",
|
|
|
+ "name": "scale"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The bias as a 1-dimensional tensor of size C to be applied to the output.",
|
|
|
+ "name": "bias"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The running mean (training) or the estimated mean (testing) as a 1-dimensional tensor of size C.",
|
|
|
+ "name": "mean"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The running variance (training) or the estimated variance (testing) as a 1-dimensional tensor of size C.",
|
|
|
+ "name": "var"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The input 4-dimensional tensor of shape NCHW or NHWC depending on the order parameter.",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(optional) Per-channel sums of elements to be used to determine the mean and variance for this batch",
|
|
|
+ "name": "sums"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(optional) Per-channel sum of elements squared per channel to be used to determine the variance for this batch",
|
|
|
+ "name": "sumsq"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "The output 4-dimensional tensor of the same shape as X.",
|
|
|
+ "name": "Y"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The running mean after the spatial BN operator. Must be in-place with the input mean. Should not be used for testing.",
|
|
|
+ "name": "mean"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The running variance after the spatial BN operator. Must be in-place with the input var. Should not be used for testing.",
|
|
|
+ "name": "var"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Saved mean used during training to speed up gradient computation. Should not be used for testing.",
|
|
|
+ "name": "saved_mean"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Saved variance used during training to speed up gradient computation. Should not be used for testing.",
|
|
|
+ "name": "saved_var"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "LRN",
|
|
|
"schema": {
|
|
|
- "category": "Normalization"
|
|
|
+ "category": "Normalization",
|
|
|
+ "description": null,
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Dropout",
|
|
|
"schema": {
|
|
|
- "category": "Dropout"
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(float, default 0.5) the ratio of random dropout",
|
|
|
+ "name": "ratio",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) if nonzero, run dropout in test mode where the output is simply Y = X.",
|
|
|
+ "name": "is_test"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "category": "Dropout",
|
|
|
+ "description": "\nDropout takes one input data (Tensor<float>) and produces two Tensor outputs,\noutput (Tensor<float>) and mask (Tensor<bool>). Depending on whether it is in\ntest mode or not, the output Y will either be a random dropout, or a simple\ncopy of the input. Note that our implementation of Dropout does scaling in\nthe training phase, so during testing nothing needs to be done.\n",
|
|
|
+ "inputs": [
|
|
|
+ {
|
|
|
+ "description": "The input data as Tensor.",
|
|
|
+ "name": "data"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "description": "The output.",
|
|
|
+ "name": "output"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The output mask. If is_test is nonzero, this output is not filled.",
|
|
|
+ "name": "mask"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "Concat",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "Which axis to concat on",
|
|
|
+ "name": "axis",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Either NHWC or NCHW, will concat on C axis, defaults to NCHW",
|
|
|
+ "name": "order",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Pass 1 to add the axis specified in arg 'axis' to all input tensors",
|
|
|
+ "name": "add_axis",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"category": "Tensor",
|
|
|
+ "description": "Concatenate a list of tensors into a single tensor",
|
|
|
"inputs": [
|
|
|
- { "name": "inputs", "option": "variadic" }
|
|
|
- ],
|
|
|
+ {
|
|
|
+ "name": "inputs",
|
|
|
+ "option": "variadic"
|
|
|
+ }
|
|
|
+ ],
|
|
|
"outputs": [
|
|
|
- { "name": "concat_result" },
|
|
|
- { "name": "split_info" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Concatenated tensor",
|
|
|
+ "name": "concat_result"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "The dimensions of the inputs.",
|
|
|
+ "name": "split_info"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "GenerateProposals",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(float) spatial scale",
|
|
|
+ "name": "spatial_scale",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) RPN_PRE_NMS_TOP_N",
|
|
|
+ "name": "pre_nms_topN",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) RPN_POST_NMS_TOP_N",
|
|
|
+ "name": "post_nms_topN",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(float) RPN_NMS_THRESH",
|
|
|
+ "name": "nms_thresh",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(float) RPN_MIN_SIZE",
|
|
|
+ "name": "min_size",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nGenerate bounding box proposals for Faster RCNN. The propoasls are generated for\na list of images based on image score 'score', bounding box regression result\n'deltas' as well as predefined bounding box shapes 'anchors'. Greedy\nnon-maximum suppression is applied to generate the final bounding boxes.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "scores" },
|
|
|
- { "name": "bbox_deltas" },
|
|
|
- { "name": "im_info" },
|
|
|
- { "name": "anchors" }
|
|
|
+ {
|
|
|
+ "description": "Scores from conv layer, size (img_count, A, H, W)",
|
|
|
+ "name": "scores"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Bounding box deltas from conv layer, size (img_count, 4 * A, H, W)",
|
|
|
+ "name": "bbox_deltas"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Image info, size (img_count, 3), format (height, width, scale)",
|
|
|
+ "name": "im_info"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Bounding box anchors, size (A, 4)",
|
|
|
+ "name": "anchors"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "rois" },
|
|
|
- { "name": "rois_probs" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Proposals, size (n x 5), format (image_index, x1, y1, x2, y2)",
|
|
|
+ "name": "rois"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "scores of proposals, size (n)",
|
|
|
+ "name": "rois_probs"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "RoIAlign",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(float) default 1.0; Spatial scale of the input feature map X relative to the input image. E.g., 0.0625 if X has a stride of 16 w.r.t. the input image.",
|
|
|
+ "name": "spatial_scale",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) default 1; Pooled output Y's height.",
|
|
|
+ "name": "pooled_h",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) default 1; Pooled output Y's width.",
|
|
|
+ "name": "pooled_w",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) default -1; number of sampling points in the interpolation grid used to compute the output value of each pooled output bin. If > 0, then exactly sampling_ratio x sampling_ratio grid points are used. If <= 0, then an adaptive number of grid points are used (computed as ceil(roi_width / pooled_w), and likewise for height).",
|
|
|
+ "name": "sampling_ratio",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nRegion of Interest (RoI) align operation as used in Mask R-CNN.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "X" },
|
|
|
- { "name": "RoIs" }
|
|
|
+ {
|
|
|
+ "description": "4D feature map input of shape (N, C, H, W).",
|
|
|
+ "name": "X"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "2D input of shape (R, 4 or 5) specifying R RoIs representing: batch index in [0, N - 1], x1, y1, x2, y2. The RoI coordinates are in the coordinate system of the input image. For inputs corresponding to a single image, batch index can be excluded to have just 4 columns.",
|
|
|
+ "name": "RoIs"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "Y" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "4D output of shape (R, C, pooled_h, pooled_w). The r-th batch element is a pooled feature map cooresponding to the r-th RoI.",
|
|
|
+ "name": "Y"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "BBoxTransform",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "vector<float> weights [wx, wy, ww, wh] for the deltas",
|
|
|
+ "name": "weights",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "bool (default true), transform the boxes to the scaled image space after applying the bbox deltas.Set to false to match the detectron code, set to true for keypoint models and for backward compatibility",
|
|
|
+ "name": "apply_scale",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "bool (default false), Correct bounding box transform coordates, see bbox_transform() in boxes.py Set to true to match the detectron code, set to false for backward compatibility",
|
|
|
+ "name": "correct_transform_coords",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nTransform proposal bounding boxes to target bounding box using bounding box\n regression deltas.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "rois" },
|
|
|
- { "name": "deltas" },
|
|
|
- { "name": "im_info" }
|
|
|
+ {
|
|
|
+ "description": "Bounding box proposals in pixel coordinates, Size (M, 4), format [x1, y1, x2, y2], orSize (M, 5), format [batch_index, x1, y1, x2, y2]. If proposals from multiple images in a batch are present, they should be grouped sequentially and in incremental order.",
|
|
|
+ "name": "rois"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "bounding box translations and scales,size (M, 4*K), format [dx, dy, dw, dh], K = # classes",
|
|
|
+ "name": "deltas"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Image dimensions, size (batch_size, 3), format [img_height, img_width, img_scale]",
|
|
|
+ "name": "im_info"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "box_out" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Pixel coordinates of the transformed bounding boxes,Size (M, 4*K), format [x1, y1, x2, y2]",
|
|
|
+ "name": "box_out"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Tensor of shape (batch_size) with each element denoting the number of RoIs belonging to the corresponding image in batch",
|
|
|
+ "name": "roi_batch_splits"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
"name": "BoxWithNMSLimit",
|
|
|
"schema": {
|
|
|
+ "attributes": [
|
|
|
+ {
|
|
|
+ "description": "(float) TEST.SCORE_THRESH",
|
|
|
+ "name": "score_thresh",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(float) TEST.NMS",
|
|
|
+ "name": "nms",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(int) TEST.DEECTIONS_PER_IM",
|
|
|
+ "name": "detections_per_im",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(bool) TEST.SOFT_NMS.ENABLED",
|
|
|
+ "name": "soft_nms_enabled",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(string) TEST.SOFT_NMS.METHOD",
|
|
|
+ "name": "soft_nms_method",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(float) TEST.SOFT_NMS.SIGMA",
|
|
|
+ "name": "soft_nms_sigma",
|
|
|
+ "option": "optional"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "(float) Lower bound on updated scores to discard boxes",
|
|
|
+ "name": "soft_nms_min_score_thres",
|
|
|
+ "option": "optional"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "description": "\nApply NMS to each class (except background) and limit the number of\nreturned boxes.\n",
|
|
|
"inputs": [
|
|
|
- { "name": "scores" },
|
|
|
- { "name": "boxes" },
|
|
|
- { "name": "batch_splits" }
|
|
|
+ {
|
|
|
+ "description": "Scores, size (count, num_classes)",
|
|
|
+ "name": "scores"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Bounding box for each class, size (count, num_classes * 4)",
|
|
|
+ "name": "boxes"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Tensor of shape (batch_size) with each element denoting the number of RoIs/boxes belonging to the corresponding image in batch. Sum should add up to total count of scores/boxes.",
|
|
|
+ "name": "batch_splits"
|
|
|
+ }
|
|
|
],
|
|
|
"outputs": [
|
|
|
- { "name": "scores" },
|
|
|
- { "name": "boxes" },
|
|
|
- { "name": "classes" },
|
|
|
- { "name": "batch_splits" },
|
|
|
- { "name": "keeps" },
|
|
|
- { "name": "keeps_size" }
|
|
|
- ]
|
|
|
+ {
|
|
|
+ "description": "Filtered scores, size (n)",
|
|
|
+ "name": "scores"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Filtered boxes, size (n, 4)",
|
|
|
+ "name": "boxes"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Class id for each filtered score/box, size (n)",
|
|
|
+ "name": "classes"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Output batch splits for scores/boxes after applying NMS",
|
|
|
+ "name": "batch_splits"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Optional filtered indices, size (n)",
|
|
|
+ "name": "keeps"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "description": "Optional number of filtered indices per class, size (num_classes)",
|
|
|
+ "name": "keeps_size"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "support_level": "core"
|
|
|
}
|
|
|
}
|
|
|
]
|