2 týždňov pred · 0d504e7ebf
--- a/source/onnx-metadata.json
+++ b/source/onnx-metadata.json
@@ -6573,6 +6573,144 @@
 
				       }
			
 
				     ]
			
 
				   },
			
 
				+  {
			
 
				+    "name": "BitCast",
			
 
				+    "module": "ai.onnx",
			
 
				+    "version": 26,
			
 
				+    "description": "Reinterprets the binary representation of a tensor as a different data type,\nspecified by the 'to' attribute. Unlike Cast, BitCast preserves the exact bit\npattern without any value conversion.\n\nThe target data type must have the same bit-width as the input data type.\nThe output tensor has the same shape as the input tensor.\nAll types except string are supported. Implementations must treat the\nunderlying bytes as little endian.\n",
			
 
				+    "attributes": [
			
 
				+      {
			
 
				+        "name": "to",
			
 
				+        "type": "int64",
			
 
				+        "required": true,
			
 
				+        "description": "The data type to which the input tensor is bitwise reinterpreted. Must be one of the non-string types from DataType enum in TensorProto. The target type must have the same bit-width as the input type."
			
 
				+      }
			
 
				+    ],
			
 
				+    "inputs": [
			
 
				+      {
			
 
				+        "name": "input",
			
 
				+        "type": "T1",
			
 
				+        "description": "Input tensor to be bitcast."
			
 
				+      }
			
 
				+    ],
			
 
				+    "min_input": 1,
			
 
				+    "max_input": 1,
			
 
				+    "outputs": [
			
 
				+      {
			
 
				+        "name": "output",
			
 
				+        "type": "T2",
			
 
				+        "description": "Output tensor with the same shape as the input."
			
 
				+      }
			
 
				+    ],
			
 
				+    "min_output": 1,
			
 
				+    "max_output": 1,
			
 
				+    "type_constraints": [
			
 
				+      {
			
 
				+        "description": "Constrain input types. Bitcasting from string is not supported.",
			
 
				+        "type_param_str": "T1",
			
 
				+        "allowed_type_strs": [
			
 
				+          "tensor(uint8)",
			
 
				+          "tensor(uint16)",
			
 
				+          "tensor(uint32)",
			
 
				+          "tensor(uint64)",
			
 
				+          "tensor(int8)",
			
 
				+          "tensor(int16)",
			
 
				+          "tensor(int32)",
			
 
				+          "tensor(int64)",
			
 
				+          "tensor(bfloat16)",
			
 
				+          "tensor(float16)",
			
 
				+          "tensor(float)",
			
 
				+          "tensor(double)",
			
 
				+          "tensor(bool)",
			
 
				+          "tensor(complex64)",
			
 
				+          "tensor(complex128)",
			
 
				+          "tensor(float8e4m3fn)",
			
 
				+          "tensor(float8e4m3fnuz)",
			
 
				+          "tensor(float8e5m2)",
			
 
				+          "tensor(float8e5m2fnuz)",
			
 
				+          "tensor(uint4)",
			
 
				+          "tensor(int4)",
			
 
				+          "tensor(float4e2m1)",
			
 
				+          "tensor(float8e8m0)",
			
 
				+          "tensor(uint2)",
			
 
				+          "tensor(int2)"
			
 
				+        ]
			
 
				+      },
			
 
				+      {
			
 
				+        "description": "Constrain output types. Bitcasting to string is not supported.",
			
 
				+        "type_param_str": "T2",
			
 
				+        "allowed_type_strs": [
			
 
				+          "tensor(uint8)",
			
 
				+          "tensor(uint16)",
			
 
				+          "tensor(uint32)",
			
 
				+          "tensor(uint64)",
			
 
				+          "tensor(int8)",
			
 
				+          "tensor(int16)",
			
 
				+          "tensor(int32)",
			
 
				+          "tensor(int64)",
			
 
				+          "tensor(bfloat16)",
			
 
				+          "tensor(float16)",
			
 
				+          "tensor(float)",
			
 
				+          "tensor(double)",
			
 
				+          "tensor(bool)",
			
 
				+          "tensor(complex64)",
			
 
				+          "tensor(complex128)",
			
 
				+          "tensor(float8e4m3fn)",
			
 
				+          "tensor(float8e4m3fnuz)",
			
 
				+          "tensor(float8e5m2)",
			
 
				+          "tensor(float8e5m2fnuz)",
			
 
				+          "tensor(uint4)",
			
 
				+          "tensor(int4)",
			
 
				+          "tensor(float4e2m1)",
			
 
				+          "tensor(float8e8m0)",
			
 
				+          "tensor(uint2)",
			
 
				+          "tensor(int2)"
			
 
				+        ]
			
 
				+      }
			
 
				+    ],
			
 
				+    "examples": [
			
 
				+      {
			
 
				+        "summary": "bitcast_2d_float32_to_int32",
			
 
				+        "code": "\"\"\"Test bitcasting 2D array from float32 to int32.\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT32,\n)\nx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)\ny = x.view(np.int32)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_2d_float32_to_int32\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_bool_to_uint8",
			
 
				+        "code": "\"\"\"Test bitcasting from bool to uint8 (same size).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.UINT8,\n)\nx = np.array([True, False, True, False], dtype=np.bool_)\ny = x.view(np.uint8)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_bool_to_uint8\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_float32_to_int32",
			
 
				+        "code": "\"\"\"Test bitcasting from float32 to int32 (same size).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT32,\n)\nx = np.array([1.0, -2.5, 3.75], dtype=np.float32)\ny = x.view(np.int32)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_float32_to_int32\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_float64_to_int64",
			
 
				+        "code": "\"\"\"Test bitcasting from float64 to int64 (same size).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT64,\n)\nx = np.array([1.0, -2.5, 3.75], dtype=np.float64)\ny = x.view(np.int64)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_float64_to_int64\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_int32_to_float32",
			
 
				+        "code": "\"\"\"Test bitcasting from int32 to float32 (same size).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.FLOAT,\n)\nx = np.array([1065353216, -1071644672, 1081081856], dtype=np.int32)\ny = x.view(np.float32)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_int32_to_float32\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_int64_to_float64",
			
 
				+        "code": "\"\"\"Test bitcasting from int64 to float64 (same size).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.DOUBLE,\n)\nx = np.array(\n    [4607182418800017408, -4611686018427387904, 4614256656552045184],\n    dtype=np.int64,\n)\ny = x.view(np.float64)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_int64_to_float64\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_int8_to_uint8",
			
 
				+        "code": "\"\"\"Test bitcasting from int8 to uint8 (same size, different signedness).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.UINT8,\n)\nx = np.array([-1, -128, 127, 0], dtype=np.int8)\ny = x.view(np.uint8)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_int8_to_uint8\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_scalar_float32_to_int32",
			
 
				+        "code": "\"\"\"Test bitcasting scalar from float32 to int32.\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT32,\n)\nx = np.array(1.0, dtype=np.float32)\ny = x.view(np.int32)\nexpect(\n    node, inputs=[x], outputs=[y], name=\"test_bitcast_scalar_float32_to_int32\"\n)"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_uint16_to_int16",
			
 
				+        "code": "\"\"\"Test bitcasting from uint16 to int16 (same size, different signedness).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT16,\n)\nx = np.array([1, 32768, 65535], dtype=np.uint16)\ny = x.view(np.int16)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_uint16_to_int16\")"
			
 
				+      },
			
 
				+      {
			
 
				+        "summary": "bitcast_uint32_to_int32",
			
 
				+        "code": "\"\"\"Test bitcasting from uint32 to int32 (same size, different signedness).\"\"\"\nnode = onnx.helper.make_node(\n    \"BitCast\",\n    inputs=[\"x\"],\n    outputs=[\"y\"],\n    to=onnx.TensorProto.INT32,\n)\nx = np.array([4294967295, 2147483648, 2147483647], dtype=np.uint32)\ny = x.view(np.int32)\nexpect(node, inputs=[x], outputs=[y], name=\"test_bitcast_uint32_to_int32\")"
			
 
				+      }
			
 
				+    ]
			
 
				+  },
			
 
				   {
			
 
				     "name": "BitShift",
			
 
				     "module": "ai.onnx",
			
@@ -25302,7 +25440,7 @@
 
				     "name": "GroupQueryAttention",
			
 
				     "module": "com.microsoft",
			
 
				     "version": 1,
			
 
				-    "description": "Group Query Self/Cross Attention.\n\n*Highly recommend using k-v cache share buffer for both CPU and CUDA. Enabled through IOBinding past and present kv.\nSupports different number of heads for q and kv for CPU and CUDA.\nOnly supports causal and local attention.\nSupports rotary position embedding for CPU and CUDA.\nSupports packed input for CPU and CUDA.\nSupports continuous decoding for batch_size == 1 for CPU and CUDA.\n\n",
			
 
				+    "description": "Group Query Self/Cross Attention with KV Cache Quantization Support.\n\nThis operator implements causal grouped-query attention with past state (KV cache) support.\nIt also supports optional float8, int8 or int4 quantization for the KV cache to reduce memory footprint.\n\n**Cache Format:**\nThe past and present KV cache tensors are expected in a BNSH format: `(batch_size, num_heads, cache_sequence_length, head_size)`, where `cache_sequence_length` is the length of the cached key/value sequences, or the maximum sequence length when past and present buffer sharing is used.\n\n**Quantization:**\nWhen quantization is enabled, `past_key` and `past_value` inputs can be of type `float8e4m3fn`, `uint8` or `int8`. The corresponding `k_scale` and `v_scale` tensors must be provided.\nThe operator will output `present_key` and `present_value` in same format as the `past_key` and `past_value`.\n\nFor 4-bit quantization, the data type is uint8 where each byte contains two 4-bit values. The bit width of quantized KV cache can be set using `kv_cache_bit_width` attribute.\n\nThe shapes of the k_scale, v_scale tensors shall be broadcastable to present_key shape.\n\n**Quantization Modes (`k_quant_type`, `v_quant_type` attributes):**\n- **\"NONE\"**: No quantization.\n- **\"PER_TENSOR\"**: A single scale for the entire tensor. Scale example shape: `[1]`.\n- **\"PER_CHANNEL\"**: A scale for each channel. Scale example shape: `[1, num_heads_k, 1, head_size]`.\n",
			
 
				     "attributes": [
			
 
				       {
			
 
				         "name": "do_rotary",
			
@@ -25310,6 +25448,19 @@
 
				         "required": false,
			
 
				         "description": "Whether to use rotary position embedding. Default value is 0."
			
 
				       },
			
 
				+      {
			
 
				+        "name": "k_quant_type",
			
 
				+        "type": "string",
			
 
				+        "required": false,
			
 
				+        "default": "NONE",
			
 
				+        "description": "Quantization type for K cache. One of 'NONE', 'PER_TENSOR', 'PER_CHANNEL'."
			
 
				+      },
			
 
				+      {
			
 
				+        "name": "kv_cache_bit_width",
			
 
				+        "type": "int64",
			
 
				+        "required": false,
			
 
				+        "description": "Bit width of quantized KV cache. Supported values are 8 and 4."
			
 
				+      },
			
 
				       {
			
 
				         "name": "kv_num_heads",
			
 
				         "type": "int64",
			
@@ -25359,6 +25510,13 @@
 
				         "type": "float32",
			
 
				         "required": false,
			
 
				         "description": "Softcap value for attention weights. Default value is 0."
			
 
				+      },
			
 
				+      {
			
 
				+        "name": "v_quant_type",
			
 
				+        "type": "string",
			
 
				+        "required": false,
			
 
				+        "default": "NONE",
			
 
				+        "description": "Quantization type for V cache. One of 'NONE', 'PER_TENSOR', 'PER_CHANNEL'."
			
 
				       }
			
 
				     ],
			
 
				     "inputs": [
			
@@ -25381,13 +25539,13 @@
 
				       },
			
 
				       {
			
 
				         "name": "past_key",
			
 
				-        "type": "T",
			
 
				+        "type": "T_CACHE",
			
 
				         "option": "optional",
			
 
				         "description": "past state key with support for format BNSH. When past_key uses same tensor as present_key(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length."
			
 
				       },
			
 
				       {
			
 
				         "name": "past_value",
			
 
				-        "type": "T",
			
 
				+        "type": "T_CACHE",
			
 
				         "option": "optional",
			
 
				         "description": "past state value with support for format BNSH. When past_value uses same tensor as present_value(k-v cache), it is of length max_sequence_length... otherwise of length past_sequence_length."
			
 
				       },
			
@@ -25430,10 +25588,22 @@
 
				         "type": "T",
			
 
				         "option": "optional",
			
 
				         "description": "1D tensor with shape (num_heads). Each head has a smooth factor adding to the denominator of softmax."
			
 
				+      },
			
 
				+      {
			
 
				+        "name": "k_scale",
			
 
				+        "type": "T_KV_SCALE",
			
 
				+        "option": "optional",
			
 
				+        "description": "Scale tensor for past_key."
			
 
				+      },
			
 
				+      {
			
 
				+        "name": "v_scale",
			
 
				+        "type": "T_KV_SCALE",
			
 
				+        "option": "optional",
			
 
				+        "description": "Scale tensor for past_value."
			
 
				       }
			
 
				     ],
			
 
				     "min_input": 7,
			
 
				-    "max_input": 12,
			
 
				+    "max_input": 14,
			
 
				     "outputs": [
			
 
				       {
			
 
				         "name": "output",
			
@@ -25442,12 +25612,12 @@
 
				       },
			
 
				       {
			
 
				         "name": "present_key",
			
 
				-        "type": "T",
			
 
				+        "type": "T_CACHE",
			
 
				         "description": "present state key with support for format BNSH. When past_key uses same tensor as present_key(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length."
			
 
				       },
			
 
				       {
			
 
				         "name": "present_value",
			
 
				-        "type": "T",
			
 
				+        "type": "T_CACHE",
			
 
				         "description": "present state value with support for format BNSH. When past_value uses same tensor as present_value(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length."
			
 
				       },
			
 
				       {
			
@@ -25459,7 +25629,7 @@
 
				     ],
			
 
				     "min_output": 3,
			
 
				     "max_output": 4,
			
 
				-    "inputs_range": "7 - 12",
			
 
				+    "inputs_range": "7 - 14",
			
 
				     "outputs_range": "3 - 4",
			
 
				     "type_constraints": [
			
 
				       {
			
@@ -25471,6 +25641,25 @@
 
				           "tensor(float)"
			
 
				         ]
			
 
				       },
			
 
				+      {
			
 
				+        "description": "Constrain KV cache types.",
			
 
				+        "type_param_str": "T_CACHE",
			
 
				+        "allowed_type_strs": [
			
 
				+          "tensor(float)",
			
 
				+          "tensor(float16)",
			
 
				+          "tensor(bfloat16)",
			
 
				+          "tensor(uint8)",
			
 
				+          "tensor(int8)",
			
 
				+          "tensor(float8e4m3fn)"
			
 
				+        ]
			
 
				+      },
			
 
				+      {
			
 
				+        "description": "Constrain KV cache scale types.",
			
 
				+        "type_param_str": "T_KV_SCALE",
			
 
				+        "allowed_type_strs": [
			
 
				+          "tensor(float)"
			
 
				+        ]
			
 
				+      },
			
 
				       {
			
 
				         "description": "Constrain mask to int tensor.",
			
 
				         "type_param_str": "M",