Преглед на файлове

Update onnx-metadata.json

Lutz Roeder преди 1 месец
родител
ревизия
0e28981d40
променени са 1 файла, в които са добавени 21 реда и са изтрити 3 реда
  1. 21 3
      source/onnx-metadata.json

+ 21 - 3
source/onnx-metadata.json

@@ -45400,7 +45400,7 @@
     "name": "QMoE",
     "module": "com.microsoft",
     "version": 1,
-    "description": "Quantized mixture of experts (MoE).\n\n      Only weights are quantized with symmetric quantization.\n      The quantized weights are stored in column major order per expert.\n      The quantization block size can be specified. If not provided, column wise quantization is used.\n\n      The SwiGLU (Swish-Gated Linear Unit) activation function is like:\n         g = xW + b\n         l = xV + c\n         G = clamp(g, max=limit)\n         L = clamp(l, min=-limit, max=limit)\n         swiglu = G * sigmoid(alpha * G) * (L + beta)\n      where x is the input, W and V are weight matrices, b and c are bias vectors, and alpha, beta and limit are constant float parameters.\n      When swiglu_fusion=0, two GEMMs are not fused, and they are FC1 and FC3 in the inputs.\n      When swiglu_fusion=1, two GEMMs are fused so that g and l are computed in a single GEMM (FC1), and g and l are interleaved on each row of size 2 * inter_size.\n      When swiglu_fusion=2, two GEMMs are fused, and g and l are concatenated on each row.\n      ",
+    "description": "Quantized mixture of experts (MoE).\n\n      The quantized weights are stored in column major order per expert.\n      The quantization block size can be specified. If not provided, column wise quantization is used.\n\n      The formula of linear dequantization of the quantized weights using scale and (optionally) zero-point is:\n        dequantized_weight = (quantized_weight - zero_point) * scale\n      When zero_point is not provided, the default value is 2^(bits-1): 8 for 4 bits, 128 for 8 bits.\n\n      If block_size is provided, both hidden_size and inter_size must be divisible by the block size, and\n      the dequantization is performed per block of size block_size along the K (input feature) dimension.\n\n      If block_size and zero_point are provided, both hidden_size and inter_size must be divisible by block_size * pack_size,\n      where pack_size = 8 / expert_weight_bits.\n\n      The SwiGLU (Swish-Gated Linear Unit) activation function is like:\n         g = xW + b\n         l = xV + c\n         G = clamp(g, max=limit)\n         L = clamp(l, min=-limit, max=limit)\n         swiglu = G * sigmoid(alpha * G) * (L + beta)\n      where x is the input, W and V are weight matrices, b and c are bias vectors, and alpha, beta and limit are constant float parameters.\n      When swiglu_fusion=0, two GEMMs are not fused, and they are FC1 and FC3 in the inputs.\n      When swiglu_fusion=1, two GEMMs are fused so that g and l are computed in a single GEMM (FC1), and g and l are interleaved on each row of size 2 * inter_size.\n      When swiglu_fusion=2, two GEMMs are fused, and g and l are concatenated on each row.\n      ",
     "attributes": [
       {
         "name": "activation_alpha",
@@ -45527,10 +45527,28 @@
         "type": "T",
         "option": "optional",
         "description": "2D optional tensor with shape (num_experts, inter_size)"
+      },
+      {
+        "name": "fc1_zero_points",
+        "type": "T1",
+        "option": "optional",
+        "description": "2D tensor with shape (num_experts, fusion_size * inter_size / pack_size), or 3D tensor with shape (num_experts, fusion_size * inter_size, hidden_size / block_size / pack_size) when block_size is provided."
+      },
+      {
+        "name": "fc2_zero_points",
+        "type": "T1",
+        "option": "optional",
+        "description": "2D tensor with shape (num_experts, hidden_size / pack_size), or 3D tensor with shape (num_experts, hidden_size, inter_size / block_size / pack_size) when block_size is provided."
+      },
+      {
+        "name": "fc3_zero_points",
+        "type": "T1",
+        "option": "optional",
+        "description": "2D optional tensor with shape (num_experts, inter_size / pack_size), or 3D optional tensor with shape (num_experts, inter_size, hidden_size / block_size / pack_size) when block_size is provided."
       }
     ],
     "min_input": 7,
-    "max_input": 11,
+    "max_input": 14,
     "outputs": [
       {
         "name": "output",
@@ -45540,7 +45558,7 @@
     ],
     "min_output": 1,
     "max_output": 1,
-    "inputs_range": "7 - 11",
+    "inputs_range": "7 - 14",
     "type_constraints": [
       {
         "description": "Constrain input and output types to float tensors.",