|
|
@@ -1445,7 +1445,7 @@
|
|
|
{
|
|
|
"name": "amdg.buffer_load",
|
|
|
"summary": "Load from a scalar base pointer and a tensor offset",
|
|
|
- "description": "AMD Buffer load operation. Buffer store is similar to\n a normal store but it accesses global memory via a scalar base pointer\n and a tensor of offsets instead of a tensor of pointers. The other fields\n are similar to a normal load, i.e., the `mask` is a boolean vector that\n determines if a given element should be read from memory, and `other` is the\n element that should be returned on lane `i` when `mask[i] == 0`.\n Stride is the distance between the beginning of contiguous memory chunks.\n When performing a load of a block, the `stride` is the address difference between\n the first elements of each row in bytes. Compiler tries to obtain the `stride`\n when it converts to the buffer ops because it is important for optimizing\n the cache memory access.",
|
|
|
+ "description": "AMD Buffer load operation. Buffer store is similar to\n a normal store but it accesses global memory via a scalar base pointer\n and a tensor of offsets instead of a tensor of pointers. The other fields\n are similar to a normal load, i.e., the `mask` is a boolean vector that\n determines if a given element should be read from memory, and `other` is the\n element that should be returned on lane `i` when `mask[i] == 0`.\n Stride is the distance between the beginning of contiguous memory chunks.\n When performing a load of a block, the `stride` is the address difference between\n the first elements of each row in bytes. Compiler tries to obtain the `stride`\n when it converts to the buffer ops because it is important for optimizing\n the cache memory access.\n Contiguity is the maximum number of elements that can be loaded in a single vector\n with the given layout and mask.\n This allows to use buffer_load even if the alignment cannot be proven based on IR.",
|
|
|
"inputs": [
|
|
|
{ "name": "ptr", "type": "TT_Ptr" },
|
|
|
{ "name": "offsets", "type": "I32Tensor" },
|
|
|
@@ -1457,7 +1457,8 @@
|
|
|
{ "name": "result", "type": "TT_Tensor" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
- { "name": "cache", "type": "DefaultValuedAttr<TT_CacheModifierAttr{none|ca|cg|wb|cs|wt|cv}, ::mlir::triton::CacheModifier::NONE>" }
|
|
|
+ { "name": "cache", "type": "DefaultValuedAttr<TT_CacheModifierAttr{none|ca|cg|wb|cs|wt|cv}, ::mlir::triton::CacheModifier::NONE>" },
|
|
|
+ { "name": "contiguity", "type": "DefaultValuedAttr<I32Attr, 1>" }
|
|
|
],
|
|
|
"assemblyFormat": "$ptr `[` $offsets `]` (`,` $mask^)? (`,` $other^)?\n oilist(`cacheModifier` `=` $cache)\n (`stride` `=` $stride^)?\n attr-dict `:` type($result)"
|
|
|
},
|
|
|
@@ -1485,7 +1486,7 @@
|
|
|
{
|
|
|
"name": "amdg.buffer_store",
|
|
|
"summary": "Store into scalar base pointer and a tensor offset",
|
|
|
- "description": "AMD Buffer store operation. Buffer store is similar to\n normal store but it accesses global memory via a scalar base pointer\n and a tensor of offsets instead of a tensor of pointers. The other fields\n are similar to a normal store , i.e., the `mask` is a boolean vector that\n determines if a given element should be written to memory, and `value` is the\n tensor of elements that should be written on lane `i` when `mask[i] == 1`.\n Stride is the distance between the beginning of contiguous memory chunks.\n When performing a block store, the `stride` is the address difference between\n the first elements of each row in bytes. Compiler tries to obtain the `stride`\n when it converts to the buffer ops because it is important for optimizing\n the cache memory access.",
|
|
|
+ "description": "AMD Buffer store operation. Buffer store is similar to\n normal store but it accesses global memory via a scalar base pointer\n and a tensor of offsets instead of a tensor of pointers. The other fields\n are similar to a normal store , i.e., the `mask` is a boolean vector that\n determines if a given element should be written to memory, and `value` is the\n tensor of elements that should be written on lane `i` when `mask[i] == 1`.\n Stride is the distance between the beginning of contiguous memory chunks.\n When performing a block store, the `stride` is the address difference between\n the first elements of each row in bytes. Compiler tries to obtain the `stride`\n when it converts to the buffer ops because it is important for optimizing\n the cache memory access.\n Contiguity is the maximum number of elements that can be loaded in a single vector\n with the given layout and mask.\n This allows to use buffer_store even if the alignment cannot be proven based on IR.",
|
|
|
"inputs": [
|
|
|
{ "name": "value", "type": "TT_Tensor" },
|
|
|
{ "name": "ptr", "type": "TT_Ptr" },
|
|
|
@@ -1494,7 +1495,8 @@
|
|
|
{ "name": "mask", "type": "Optional<TT_BoolTensor>" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
- { "name": "cache", "type": "DefaultValuedAttr<TT_CacheModifierAttr{none|ca|cg|wb|cs|wt|cv}, mlir::triton::CacheModifier::NONE>" }
|
|
|
+ { "name": "cache", "type": "DefaultValuedAttr<TT_CacheModifierAttr{none|ca|cg|wb|cs|wt|cv}, mlir::triton::CacheModifier::NONE>" },
|
|
|
+ { "name": "contiguity", "type": "DefaultValuedAttr<I32Attr, 1>" }
|
|
|
],
|
|
|
"assemblyFormat": "$value `,` $ptr `[` $offsets `]` (`,` $mask^)?\n oilist(`cacheModifier` `=` $cache)\n (`stride` `=` $stride^)?\n attr-dict `:` type($value)"
|
|
|
},
|
|
|
@@ -6573,7 +6575,7 @@
|
|
|
"attributes": [
|
|
|
{ "name": "exception", "type": "UnitAttr" },
|
|
|
{ "name": "callee", "type": "OptionalAttr<FlatSymbolRefAttr>" },
|
|
|
- { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel}, CallingConv::C>" },
|
|
|
+ { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel|amdgpu_kernel}, CallingConv::C>" },
|
|
|
{ "name": "side_effect", "type": "DefaultValuedAttr<CIR_SideEffect{all|pure|const}, SideEffect::All>" },
|
|
|
{ "name": "extra_attrs", "type": "CIR_ExtraFuncAttr" },
|
|
|
{ "name": "ast", "type": "OptionalAttr<ASTCallExprInterface>" }
|
|
|
@@ -7192,7 +7194,7 @@
|
|
|
{ "name": "cold", "type": "UnitAttr" },
|
|
|
{ "name": "dso_local", "type": "UnitAttr" },
|
|
|
{ "name": "linkage", "type": "DefaultValuedAttr<CIR_GlobalLinkageKind{external|available_externally|linkonce|linkonce_odr|weak|weak_odr|internal|cir_private|extern_weak|common}, GlobalLinkageKind::ExternalLinkage>" },
|
|
|
- { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel}, CallingConv::C>" },
|
|
|
+ { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel|amdgpu_kernel}, CallingConv::C>" },
|
|
|
{ "name": "extra_attrs", "type": "CIR_ExtraFuncAttr" },
|
|
|
{ "name": "sym_visibility", "type": "OptionalAttr<StrAttr>" },
|
|
|
{ "name": "comdat", "type": "UnitAttr" },
|
|
|
@@ -8116,7 +8118,7 @@
|
|
|
],
|
|
|
"attributes": [
|
|
|
{ "name": "callee", "type": "OptionalAttr<FlatSymbolRefAttr>" },
|
|
|
- { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel}, CallingConv::C>" },
|
|
|
+ { "name": "calling_conv", "type": "DefaultValuedAttr<CIR_CallingConv{c|spir_kernel|spir_function|opencl_kernel|ptx_kernel|amdgpu_kernel}, CallingConv::C>" },
|
|
|
{ "name": "side_effect", "type": "DefaultValuedAttr<CIR_SideEffect{all|pure|const}, SideEffect::All>" },
|
|
|
{ "name": "extra_attrs", "type": "CIR_ExtraFuncAttr" },
|
|
|
{ "name": "ast", "type": "OptionalAttr<ASTCallExprInterface>" }
|
|
|
@@ -9983,12 +9985,12 @@
|
|
|
"summary": "Opaque call operation",
|
|
|
"description": "The `emitc.call_opaque` operation represents a C++ function call. The callee\n can be an arbitrary non-empty string. The call allows specifying order\n of operands and attributes in the call as follows:\n\n - integer value of index type refers to an operand;\n - attribute which will get lowered to constant value in call;\n\n Example:\n\n ```mlir\n // Custom form defining a call to `foo()`.\n %0 = emitc.call_opaque \"foo\" () : () -> i32\n\n // Generic form of the same operation.\n %0 = \"emitc.call_opaque\"() {callee = \"foo\"} : () -> i32\n ```",
|
|
|
"inputs": [
|
|
|
- { "name": "args", "type": "OptionalAttr<ArrayAttr>" },
|
|
|
- { "name": "template_args", "type": "OptionalAttr<ArrayAttr>" },
|
|
|
{ "name": "operands", "type": "Variadic<EmitCType>" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
- { "name": "callee", "type": "StrAttr" }
|
|
|
+ { "name": "callee", "type": "StrAttr" },
|
|
|
+ { "name": "args", "type": "OptionalAttr<ArrayAttr>" },
|
|
|
+ { "name": "template_args", "type": "OptionalAttr<ArrayAttr>" }
|
|
|
],
|
|
|
"assemblyFormat": "$callee `(` $operands `)` attr-dict `:` functional-type($operands, results)"
|
|
|
},
|
|
|
@@ -24655,7 +24657,9 @@
|
|
|
"summary": "BroadcastInDim operation",
|
|
|
"description": "Expands the dimensions and/or rank of an input tensor by duplicating the\n data in the `operand` tensor and produces a `result` tensor.\n\n See:\n https://github.com/openxla/stablehlo/blob/main/docs/spec.md#broadcast_in_dim\n\n Example:\n ```mlir\n %result = mhlo.broadcast_in_dim %operand, dims = [2, 1] : (tensor<1x3xi32>) -> tensor<2x3x2xi32>\n ```",
|
|
|
"inputs": [
|
|
|
- { "name": "operand", "type": "MHLO_Tensor" },
|
|
|
+ { "name": "operand", "type": "MHLO_Tensor" }
|
|
|
+ ],
|
|
|
+ "attributes": [
|
|
|
{ "name": "broadcast_dimensions", "type": "MHLO_BroadcastDimAttr" }
|
|
|
]
|
|
|
},
|
|
|
@@ -25043,10 +25047,10 @@
|
|
|
"description": "This operation is functionally identical to\n [broadcast_in_dim](https://github.com/openxla/stablehlo/blob/main/docs/spec.md#broadcast_in_dim)\n op, but the result shape is specified dynamically via `output_dimensions`.\n\n It also accepts optional attributes to express static knowledge about the\n expanding behavior of dimensions. If not specified, all dimensions are\n assumed to be possibly expanding. The sets of dimensions that are known to\n be expanding and the set of dimensions that are known to be non-expanding\n must be disjoint and they must be a subset of the operand's dimensions.\n\n See: https://github.com/openxla/stablehlo/blob/main/docs/spec.md#dynamic_broadcast_in_dim\n\n Example:\n ```mlir\n %operand = mhlo.constant dense<[[1, 2, 3]]> : tensor<1x3xi64>\n %output_dimensions = mhlo.constant dense<[2, 3, 2]> : tensor<3xi64>\n %result = \"mhlo.dynamic_broadcast_in_dim\"(%operand, %output_dimensions) {\n broadcast_dimensions = array<i64: 2, 1>,\n known_expanding_dimensions = array<i64: 0>,\n known_nonexpanding_dimensions = array<i64: 1>\n } : (tensor<1x3xi64>, tensor<3xi64>) -> tensor<2x3x2xi64>\n ```",
|
|
|
"inputs": [
|
|
|
{ "name": "operand", "type": "MHLO_Tensor" },
|
|
|
- { "name": "output_dimensions", "type": "MHLO_DimensionTensor" },
|
|
|
- { "name": "broadcast_dimensions", "type": "MHLO_BroadcastDimAttr" }
|
|
|
+ { "name": "output_dimensions", "type": "MHLO_DimensionTensor" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
+ { "name": "broadcast_dimensions", "type": "MHLO_BroadcastDimAttr" },
|
|
|
{ "name": "known_expanding_dimensions", "type": "OptionalAttr<MHLO_BroadcastDimAttr>" },
|
|
|
{ "name": "known_nonexpanding_dimensions", "type": "OptionalAttr<MHLO_BroadcastDimAttr>" }
|
|
|
]
|
|
|
@@ -30748,16 +30752,18 @@
|
|
|
"summary": "cancel directive",
|
|
|
"description": "The cancel construct activates cancellation of the innermost enclosing\n region of the type specified.",
|
|
|
"inputs": [
|
|
|
- { "name": "cancel_directive", "type": "CancellationConstructTypeAttr" },
|
|
|
{ "name": "if_expr", "type": "Optional<I1>" }
|
|
|
],
|
|
|
+ "attributes": [
|
|
|
+ { "name": "cancel_directive", "type": "CancellationConstructTypeAttr" }
|
|
|
+ ],
|
|
|
"assemblyFormat": "`cancellation_construct_type` `(`\n custom<ClauseAttr>($cancel_directive) `)` oilist(`if` `(` $if_expr `)`) attr-dict"
|
|
|
},
|
|
|
{
|
|
|
"name": "omp.cancellation_point",
|
|
|
"summary": "cancellation point directive",
|
|
|
"description": "The cancellation point construct introduces a user-defined cancellation\n point at which implicit or explicit tasks check if cancellation of the\n innermost enclosing region of the type specified has been activated.",
|
|
|
- "inputs": [
|
|
|
+ "attributes": [
|
|
|
{ "name": "cancel_directive", "type": "CancellationConstructTypeAttr" }
|
|
|
],
|
|
|
"assemblyFormat": "`cancellation_construct_type` `(`\n custom<ClauseAttr>($cancel_directive) `)` attr-dict"
|
|
|
@@ -57169,12 +57175,10 @@
|
|
|
},
|
|
|
{
|
|
|
"name": "test.op_with_effects_a",
|
|
|
- "inputs": [
|
|
|
- { "name": "optional_symbol", "type": "OptionalAttr<SymbolRefAttr>" }
|
|
|
- ],
|
|
|
"attributes": [
|
|
|
{ "name": "first", "type": "FlatSymbolRefAttr" },
|
|
|
- { "name": "second", "type": "SymbolRefAttr" }
|
|
|
+ { "name": "second", "type": "SymbolRefAttr" },
|
|
|
+ { "name": "optional_symbol", "type": "OptionalAttr<SymbolRefAttr>" }
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
@@ -105087,13 +105091,11 @@
|
|
|
"name": "util.global.address",
|
|
|
"summary": "Returns an address reference to a global.",
|
|
|
"description": "Returns the address of a global as a typed reference. Can be used with the\n global load and store indirect ops.",
|
|
|
- "inputs": [
|
|
|
- { "name": "global", "type": "Util_GlobalRefAttr" }
|
|
|
- ],
|
|
|
"outputs": [
|
|
|
{ "name": "result", "type": "Util_AnyGlobalPtr" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
+ { "name": "global", "type": "Util_GlobalRefAttr" },
|
|
|
{ "name": "is_immutable", "type": "UnitAttr" }
|
|
|
],
|
|
|
"assemblyFormat": "(`immutable` $is_immutable^)?\n $global attr-dict `:` qualified(type($result))",
|
|
|
@@ -105103,13 +105105,11 @@
|
|
|
"name": "util.global.load",
|
|
|
"summary": "Loads a value from a global variable.",
|
|
|
"description": "Returns a global variable value. |is_immutable| is a reflection of the\n mutability of the loaded global to minimize the need to traverse symbol\n tables.",
|
|
|
- "inputs": [
|
|
|
- { "name": "global", "type": "Util_GlobalRefAttr" }
|
|
|
- ],
|
|
|
"outputs": [
|
|
|
{ "name": "result", "type": "AnyType" }
|
|
|
],
|
|
|
"attributes": [
|
|
|
+ { "name": "global", "type": "Util_GlobalRefAttr" },
|
|
|
{ "name": "is_immutable", "type": "UnitAttr" }
|
|
|
],
|
|
|
"assemblyFormat": "(`immutable` $is_immutable^)?\n $global attr-dict `:` type($result)",
|
|
|
@@ -105136,7 +105136,9 @@
|
|
|
"summary": "Stores a value into a global variable.",
|
|
|
"description": "Stores a copy of the value into a global variable.",
|
|
|
"inputs": [
|
|
|
- { "name": "value", "type": "AnyType" },
|
|
|
+ { "name": "value", "type": "AnyType" }
|
|
|
+ ],
|
|
|
+ "attributes": [
|
|
|
{ "name": "global", "type": "Util_GlobalRefAttr" }
|
|
|
],
|
|
|
"assemblyFormat": "$value `,` $global attr-dict `:` type($value)",
|