diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 6e8a9018d0a25..2473fe933ffcb 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -1,79 +1,88 @@ // RUN: mlir-opt %s -convert-vector-to-llvm -split-input-file | FileCheck %s -// TODO: Add tests for for vector.type_cast that would cover scalable vectors +//===----------------------------------------------------------------------===// +// vector.bitcast +//===----------------------------------------------------------------------===// -func.func @bitcast_f32_to_i32_vector_0d(%input: vector) -> vector { - %0 = vector.bitcast %input : vector to vector +func.func @bitcast_f32_to_i32_vector_0d(%arg0: vector) -> vector { + %0 = vector.bitcast %arg0 : vector to vector return %0 : vector } // CHECK-LABEL: @bitcast_f32_to_i32_vector_0d -// CHECK-SAME: %[[input:.*]]: vector -// CHECK: %[[vec_f32_1d:.*]] = builtin.unrealized_conversion_cast %[[input]] : vector to vector<1xf32> -// CHECK: %[[vec_i32_1d:.*]] = llvm.bitcast %[[vec_f32_1d]] : vector<1xf32> to vector<1xi32> -// CHECK: %[[vec_i32_0d:.*]] = builtin.unrealized_conversion_cast %[[vec_i32_1d]] : vector<1xi32> to vector -// CHECK: return %[[vec_i32_0d]] : vector +// CHECK-SAME: %[[ARG_0:.*]]: vector +// CHECK: %[[VEC_F32_1D:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector to vector<1xf32> +// CHECK: %[[VEC_I32_1D:.*]] = llvm.bitcast %[[VEC_F32_1D]] : vector<1xf32> to vector<1xi32> +// CHECK: %[[VEC_I32_0D:.*]] = builtin.unrealized_conversion_cast %[[VEC_I32_1D]] : vector<1xi32> to vector +// CHECK: return %[[VEC_I32_0D]] : vector // ----- -func.func @bitcast_f32_to_i32_vector(%input: vector<16xf32>) -> vector<16xi32> { - %0 = vector.bitcast %input : vector<16xf32> to vector<16xi32> +func.func @bitcast_f32_to_i32_vector(%arg0: vector<16xf32>) -> vector<16xi32> { + %0 = vector.bitcast %arg0 : vector<16xf32> to vector<16xi32> return %0 : vector<16xi32> } + // CHECK-LABEL: @bitcast_f32_to_i32_vector -// CHECK-SAME: %[[input:.*]]: vector<16xf32> -// CHECK: llvm.bitcast %[[input]] : vector<16xf32> to vector<16xi32> +// CHECK-SAME: %[[ARG_0:.*]]: vector<16xf32> +// CHECK: llvm.bitcast %[[ARG_0]] : vector<16xf32> to vector<16xi32> + +// ----- -func.func @bitcast_f32_to_i32_vector_scalable(%input: vector<[16]xf32>) -> vector<[16]xi32> { - %0 = vector.bitcast %input : vector<[16]xf32> to vector<[16]xi32> +func.func @bitcast_f32_to_i32_vector_scalable(%arg0: vector<[16]xf32>) -> vector<[16]xi32> { + %0 = vector.bitcast %arg0 : vector<[16]xf32> to vector<[16]xi32> return %0 : vector<[16]xi32> } // CHECK-LABEL: @bitcast_f32_to_i32_vector_scalable -// CHECK-SAME: %[[input:.*]]: vector<[16]xf32> -// CHECK: llvm.bitcast %[[input]] : vector<[16]xf32> to vector<[16]xi32> +// CHECK-SAME: %[[ARG_0:.*]]: vector<[16]xf32> +// CHECK: llvm.bitcast %[[ARG_0]] : vector<[16]xf32> to vector<[16]xi32> // ----- -func.func @bitcast_i8_to_f32_vector(%input: vector<64xi8>) -> vector<16xf32> { - %0 = vector.bitcast %input : vector<64xi8> to vector<16xf32> +func.func @bitcast_i8_to_f32_vector(%arg0: vector<64xi8>) -> vector<16xf32> { + %0 = vector.bitcast %arg0 : vector<64xi8> to vector<16xf32> return %0 : vector<16xf32> } // CHECK-LABEL: @bitcast_i8_to_f32_vector -// CHECK-SAME: %[[input:.*]]: vector<64xi8> -// CHECK: llvm.bitcast %[[input]] : vector<64xi8> to vector<16xf32> +// CHECK-SAME: %[[ARG_0:.*]]: vector<64xi8> +// CHECK: llvm.bitcast %[[ARG_0]] : vector<64xi8> to vector<16xf32> -func.func @bitcast_i8_to_f32_vector_scalable(%input: vector<[64]xi8>) -> vector<[16]xf32> { - %0 = vector.bitcast %input : vector<[64]xi8> to vector<[16]xf32> +// ----- + +func.func @bitcast_i8_to_f32_vector_scalable(%arg0: vector<[64]xi8>) -> vector<[16]xf32> { + %0 = vector.bitcast %arg0 : vector<[64]xi8> to vector<[16]xf32> return %0 : vector<[16]xf32> } // CHECK-LABEL: @bitcast_i8_to_f32_vector_scalable -// CHECK-SAME: %[[input:.*]]: vector<[64]xi8> -// CHECK: llvm.bitcast %[[input]] : vector<[64]xi8> to vector<[16]xf32> +// CHECK-SAME: %[[ARG_0:.*]]: vector<[64]xi8> +// CHECK: llvm.bitcast %[[ARG_0]] : vector<[64]xi8> to vector<[16]xf32> // ----- -func.func @bitcast_index_to_i8_vector(%input: vector<16xindex>) -> vector<128xi8> { - %0 = vector.bitcast %input : vector<16xindex> to vector<128xi8> +func.func @bitcast_index_to_i8_vector(%arg0: vector<16xindex>) -> vector<128xi8> { + %0 = vector.bitcast %arg0 : vector<16xindex> to vector<128xi8> return %0 : vector<128xi8> } // CHECK-LABEL: @bitcast_index_to_i8_vector -// CHECK-SAME: %[[input:.*]]: vector<16xindex> -// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[input]] : vector<16xindex> to vector<16xi64> +// CHECK-SAME: %[[ARG_0:.*]]: vector<16xindex> +// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<16xindex> to vector<16xi64> // CHECK: llvm.bitcast %[[T0]] : vector<16xi64> to vector<128xi8> -func.func @bitcast_index_to_i8_vector_scalable(%input: vector<[16]xindex>) -> vector<[128]xi8> { - %0 = vector.bitcast %input : vector<[16]xindex> to vector<[128]xi8> +// ----- + +func.func @bitcast_index_to_i8_vector_scalable(%arg0: vector<[16]xindex>) -> vector<[128]xi8> { + %0 = vector.bitcast %arg0 : vector<[16]xindex> to vector<[128]xi8> return %0 : vector<[128]xi8> } // CHECK-LABEL: @bitcast_index_to_i8_vector_scalable -// CHECK-SAME: %[[input:.*]]: vector<[16]xindex> -// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[input]] : vector<[16]xindex> to vector<[16]xi64> +// CHECK-SAME: %[[ARG_0:.*]]: vector<[16]xindex> +// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<[16]xindex> to vector<[16]xi64> // CHECK: llvm.bitcast %[[T0]] : vector<[16]xi64> to vector<[128]xi8> // ----- @@ -110,6 +119,10 @@ func.func @bitcast_2d_scalable(%arg0: vector<2x[4]xi32>) -> vector<2x[2]xi64> { // ----- +//===----------------------------------------------------------------------===// +// vector.broadcast +//===----------------------------------------------------------------------===// + func.func @broadcast_vec0d_from_f32(%arg0: f32) -> vector { %0 = vector.broadcast %arg0 : f32 to vector return %0 : vector @@ -142,6 +155,7 @@ func.func @broadcast_vec1d_from_f32(%arg0: f32) -> vector<2xf32> { // CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] // CHECK: return %[[T1]] : vector<2xf32> +// ----- func.func @broadcast_vec1d_from_f32_scalable(%arg0: f32) -> vector<[2]xf32> { %0 = vector.broadcast %arg0 : f32 to vector<[2]xf32> @@ -167,6 +181,8 @@ func.func @broadcast_vec1d_from_index(%arg0: index) -> vector<2xindex> { // CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<2xi64> to vector<2xindex> // CHECK: return %[[T2]] : vector<2xindex> +// ----- + func.func @broadcast_vec1d_from_index_scalable(%arg0: index) -> vector<[2]xindex> { %0 = vector.broadcast %arg0 : index to vector<[2]xindex> return %0 : vector<[2]xindex> @@ -194,6 +210,8 @@ func.func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { // CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> // CHECK: return %[[T4]] : vector<2x3xf32> +// ----- + func.func @broadcast_vec2d_from_scalar_scalable(%arg0: f32) -> vector<2x[3]xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x[3]xf32> return %0 : vector<2x[3]xf32> @@ -223,6 +241,7 @@ func.func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { // CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x array<3 x vector<4xf32>>> to vector<2x3x4xf32> // CHECK: return %[[T4]] : vector<2x3x4xf32> +// ----- func.func @broadcast_vec3d_from_scalar_scalable(%arg0: f32) -> vector<2x3x[4]xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3x[4]xf32> @@ -248,6 +267,8 @@ func.func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { // CHECK-SAME: %[[A:.*]]: vector<2xf32>) // CHECK: return %[[A]] : vector<2xf32> +// ----- + func.func @broadcast_vec1d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<[2]xf32> { %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<[2]xf32> return %0 : vector<[2]xf32> @@ -293,6 +314,8 @@ func.func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { // CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32> // CHECK: return %[[T5]] : vector<3x2xf32> +// ----- + func.func @broadcast_vec2d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<3x[2]xf32> { %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<3x[2]xf32> return %0 : vector<3x[2]xf32> @@ -323,6 +346,8 @@ func.func @broadcast_vec2d_from_index_vec1d(%arg0: vector<2xindex>) -> vector<3x // CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %{{.*}} : !llvm.array<3 x vector<2xi64>> to vector<3x2xindex> // CHECK: return %[[T4]] : vector<3x2xindex> +// ----- + func.func @broadcast_vec2d_from_index_vec1d_scalable(%arg0: vector<[2]xindex>) -> vector<3x[2]xindex> { %0 = vector.broadcast %arg0 : vector<[2]xindex> to vector<3x[2]xindex> return %0 : vector<3x[2]xindex> @@ -362,6 +387,8 @@ func.func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> // CHECK: %[[T11:.*]] = builtin.unrealized_conversion_cast %[[T10]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> // CHECK: return %[[T11]] : vector<4x3x2xf32> +// ----- + func.func @broadcast_vec3d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<4x3x[2]xf32> { %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<4x3x[2]xf32> return %0 : vector<4x3x[2]xf32> @@ -403,6 +430,8 @@ func.func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf3 // CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> // CHECK: return %[[T10]] : vector<4x3x2xf32> +// ----- + func.func @broadcast_vec3d_from_vec2d_scalable(%arg0: vector<3x[2]xf32>) -> vector<4x3x[2]xf32> { %0 = vector.broadcast %arg0 : vector<3x[2]xf32> to vector<4x3x[2]xf32> return %0 : vector<4x3x[2]xf32> @@ -434,6 +463,8 @@ func.func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { // CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]] // CHECK: return %[[T4]] : vector<4xf32> +// ----- + func.func @broadcast_stretch_scalable(%arg0: vector<1xf32>) -> vector<[4]xf32> { %0 = vector.broadcast %arg0 : vector<1xf32> to vector<[4]xf32> return %0 : vector<[4]xf32> @@ -464,6 +495,8 @@ func.func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> // CHECK: %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> // CHECK: return %[[T8]] : vector<3x4xf32> +// ----- + func.func @broadcast_stretch_at_start_scalable(%arg0: vector<1x[4]xf32>) -> vector<3x[4]xf32> { %0 = vector.broadcast %arg0 : vector<1x[4]xf32> to vector<3x[4]xf32> return %0 : vector<3x[4]xf32> @@ -564,6 +597,8 @@ func.func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2 // CHECK: %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> // CHECK: return %[[T32]] : vector<4x3x2xf32> +// ----- + func.func @broadcast_stretch_in_middle_scalable_v1(%arg0: vector<4x1x[2]xf32>) -> vector<4x3x[2]xf32> { %0 = vector.broadcast %arg0 : vector<4x1x[2]xf32> to vector<4x3x[2]xf32> return %0 : vector<4x3x[2]xf32> @@ -598,6 +633,8 @@ func.func @broadcast_stretch_in_middle_scalable_v1(%arg0: vector<4x1x[2]xf32>) - // CHECK: %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32> // CHECK: return %[[T32]] : vector<4x3x[2]xf32> +// ----- + // TODO: Add support for scalable vectors func.func @broadcast_stretch_in_middle_scalable_v2(%arg0: vector<[4]x1x2xf32>) -> vector<[4]x3x2xf32> { @@ -610,6 +647,10 @@ func.func @broadcast_stretch_in_middle_scalable_v2(%arg0: vector<[4]x1x2xf32>) - // ----- +//===----------------------------------------------------------------------===// +// vector.outerproduct +//===----------------------------------------------------------------------===// + func.func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> @@ -634,6 +675,8 @@ func.func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x // CHECK: %[[T14:.*]] = builtin.unrealized_conversion_cast %[[T13]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> // CHECK: return %[[T14]] : vector<2x3xf32> +// ----- + func.func @outerproduct_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>) -> vector<2x[3]xf32> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<[3]xf32> return %2 : vector<2x[3]xf32> @@ -679,6 +722,8 @@ func.func @outerproduct_index(%arg0: vector<2xindex>, %arg1: vector<3xindex>) -> // CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T6]] : vector<3xindex> to vector<3xi64> // CHECK: %{{.*}} = llvm.insertvalue %[[T7]], %[[T8]][0] : !llvm.array<2 x vector<3xi64>> +// ----- + func.func @outerproduct_index_scalable(%arg0: vector<2xindex>, %arg1: vector<[3]xindex>) -> vector<2x[3]xindex> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xindex>, vector<[3]xindex> return %2 : vector<2x[3]xindex> @@ -728,6 +773,8 @@ func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: v // CHECK: %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> // CHECK: return %[[T19]] : vector<2x3xf32> +// ----- + func.func @outerproduct_add_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> { %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<[3]xf32> return %2 : vector<2x[3]xf32> @@ -758,6 +805,10 @@ func.func @outerproduct_add_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32 // ----- +//===----------------------------------------------------------------------===// +// vector.mask { vector.outerproduct } +//===----------------------------------------------------------------------===// + func.func @masked_float_add_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> return %0 : vector<2xf32> @@ -768,6 +819,8 @@ func.func @masked_float_add_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v // CHECK: %[[VAL_8:.*]] = llvm.intr.fmuladd(%[[VAL_0]], %{{.*}}, %[[VAL_2]]) : (vector<2xf32>, vector<2xf32>, vector<2xf32>) -> vector<2xf32> // CHECK: %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_8]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> +// ----- + func.func @masked_float_add_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> return %0 : vector<[2]xf32> @@ -791,6 +844,8 @@ func.func @masked_float_mul_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v // CHECK: %[[VAL_9:.*]] = arith.mulf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> +// ----- + func.func @masked_float_mul_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> return %0 : vector<[2]xf32> @@ -815,6 +870,8 @@ func.func @masked_float_max_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v // CHECK: %[[VAL_9:.*]] = arith.maxnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> +// ----- + func.func @masked_float_max_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> return %0 : vector<[2]xf32> @@ -839,6 +896,8 @@ func.func @masked_float_min_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v // CHECK: %[[VAL_9:.*]] = arith.minnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> +// ----- + func.func @masked_float_min_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> return %0 : vector<[2]xf32> @@ -863,6 +922,8 @@ func.func @masked_int_add_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vec // CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_add_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -887,6 +948,8 @@ func.func @masked_int_mul_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vec // CHECK: %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_mul_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -911,6 +974,8 @@ func.func @masked_int_max_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vec // CHECK: %[[VAL_9:.*]] = arith.maxsi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_max_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -935,6 +1000,8 @@ func.func @masked_int_min_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vec // CHECK: %[[VAL_9:.*]] = arith.minui %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_min_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -959,6 +1026,8 @@ func.func @masked_int_and_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vec // CHECK: %[[VAL_9:.*]] = arith.andi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_and_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -983,6 +1052,8 @@ func.func @masked_int_or_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vect // CHECK: %[[VAL_9:.*]] = arith.ori %[[VAL_8]], %[[VAL_2]] : vector<2xi32> // CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> +// ----- + func.func @masked_int_or_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> return %0 : vector<[2]xi32> @@ -996,6 +1067,10 @@ func.func @masked_int_or_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, // ----- +//===----------------------------------------------------------------------===// +// vector.shuffle +//===----------------------------------------------------------------------===// + func.func @shuffle_0D_direct(%arg0: vector) -> vector<3xf32> { %1 = vector.shuffle %arg0, %arg0 [0, 1, 0] : vector, vector return %1 : vector<3xf32> @@ -1037,28 +1112,28 @@ func.func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf3 // CHECK-LABEL: @shuffle_1D( // CHECK-SAME: %[[A:.*]]: vector<2xf32>, // CHECK-SAME: %[[B:.*]]: vector<3xf32>) -// CHECK: %[[u0:.*]] = llvm.mlir.undef : vector<5xf32> -// CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[e1:.*]] = llvm.extractelement %[[B]][%[[c2]] : i64] : vector<3xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : i64] : vector<5xf32> -// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[e2:.*]] = llvm.extractelement %[[B]][%[[c1]] : i64] : vector<3xf32> -// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : i64] : vector<5xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[e3:.*]] = llvm.extractelement %[[B]][%[[c0]] : i64] : vector<3xf32> -// CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 -// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : i64] : vector<5xf32> -// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[e4:.*]] = llvm.extractelement %[[A]][%[[c1]] : i64] : vector<2xf32> -// CHECK: %[[c3:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : i64] : vector<5xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : i64] : vector<2xf32> -// CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : i64] : vector<5xf32> -// CHECK: return %[[i5]] : vector<5xf32> +// CHECK: %[[U0:.*]] = llvm.mlir.undef : vector<5xf32> +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 +// CHECK: %[[E1:.*]] = llvm.extractelement %[[B]][%[[C2]] : i64] : vector<3xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[I1:.*]] = llvm.insertelement %[[E1]], %[[U0]][%[[C0]] : i64] : vector<5xf32> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[E2:.*]] = llvm.extractelement %[[B]][%[[C1]] : i64] : vector<3xf32> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[I2:.*]] = llvm.insertelement %[[E2]], %[[I1]][%[[C1]] : i64] : vector<5xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[E3:.*]] = llvm.extractelement %[[B]][%[[C0]] : i64] : vector<3xf32> +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 +// CHECK: %[[I3:.*]] = llvm.insertelement %[[E3]], %[[I2]][%[[C2]] : i64] : vector<5xf32> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[E4:.*]] = llvm.extractelement %[[A]][%[[C1]] : i64] : vector<2xf32> +// CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64 +// CHECK: %[[I4:.*]] = llvm.insertelement %[[E4]], %[[I3]][%[[C3]] : i64] : vector<5xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[E5:.*]] = llvm.extractelement %[[A]][%[[C0]] : i64] : vector<2xf32> +// CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 +// CHECK: %[[I5:.*]] = llvm.insertelement %[[E5]], %[[I4]][%[[C4]] : i64] : vector<5xf32> +// CHECK: return %[[I5]] : vector<5xf32> // ----- @@ -1071,18 +1146,22 @@ func.func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf3 // CHECK-SAME: %[[B:.*]]: vector<2x4xf32>) // CHECK-DAG: %[[VAL_0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>> // CHECK-DAG: %[[VAL_1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> -// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>> -// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>> -// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[e3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>> -// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vector<4xf32>> -// CHECK: %[[VAL_3:.*]] = builtin.unrealized_conversion_cast %[[i3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> +// CHECK: %[[U0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[E1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[I1:.*]] = llvm.insertvalue %[[E1]], %[[U0]][0] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[E2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>> +// CHECK: %[[I2:.*]] = llvm.insertvalue %[[E2]], %[[I1]][1] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[E3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>> +// CHECK: %[[I3:.*]] = llvm.insertvalue %[[E3]], %[[I2]][2] : !llvm.array<3 x vector<4xf32>> +// CHECK: %[[VAL_3:.*]] = builtin.unrealized_conversion_cast %[[I3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> // CHECK: return %[[VAL_3]] : vector<3x4xf32> // ----- +//===----------------------------------------------------------------------===// +// vector.extractelement +//===----------------------------------------------------------------------===// + func.func @extractelement_from_vec_0d_f32(%arg0: vector) -> f32 { %1 = vector.extractelement %arg0[] : vector return %1 : f32 @@ -1100,9 +1179,11 @@ func.func @extractelement_from_vec_1d_f32_idx_as_i32(%arg0: vector<16xf32>) -> f } // CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) -// CHECK: %[[c:.*]] = arith.constant 15 : i32 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<16xf32> -// CHECK: return %[[x]] : f32 +// CHECK: %[[C:.*]] = arith.constant 15 : i32 +// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<16xf32> +// CHECK: return %[[X]] : f32 + +// ----- func.func @extractelement_from_vec_1d_f32_idx_as_i32_scalable(%arg0: vector<[16]xf32>) -> f32 { %0 = arith.constant 15 : i32 @@ -1111,9 +1192,9 @@ func.func @extractelement_from_vec_1d_f32_idx_as_i32_scalable(%arg0: vector<[16] } // CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32_scalable( // CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) -// CHECK: %[[c:.*]] = arith.constant 15 : i32 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : i32] : vector<[16]xf32> -// CHECK: return %[[x]] : f32 +// CHECK: %[[C:.*]] = arith.constant 15 : i32 +// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<[16]xf32> +// CHECK: return %[[X]] : f32 // ----- func.func @extractelement_from_vec_1d_f32_idx_as_index(%arg0: vector<16xf32>) -> f32 { @@ -1123,10 +1204,12 @@ func.func @extractelement_from_vec_1d_f32_idx_as_index(%arg0: vector<16xf32>) -> } // CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index( // CHECK-SAME: %[[A:.*]]: vector<16xf32>) -// CHECK: %[[c:.*]] = arith.constant 15 : index -// CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[i]] : i64] : vector<16xf32> -// CHECK: return %[[x]] : f32 +// CHECK: %[[C:.*]] = arith.constant 15 : index +// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 +// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<16xf32> +// CHECK: return %[[X]] : f32 + +// ----- func.func @extractelement_from_vec_1d_f32_idx_as_index_scalable(%arg0: vector<[16]xf32>) -> f32 { %0 = arith.constant 15 : index @@ -1135,13 +1218,17 @@ func.func @extractelement_from_vec_1d_f32_idx_as_index_scalable(%arg0: vector<[1 } // CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index_scalable( // CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) -// CHECK: %[[c:.*]] = arith.constant 15 : index -// CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[i]] : i64] : vector<[16]xf32> -// CHECK: return %[[x]] : f32 +// CHECK: %[[C:.*]] = arith.constant 15 : index +// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 +// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<[16]xf32> +// CHECK: return %[[X]] : f32 // ----- +//===----------------------------------------------------------------------===// +// vector.extract +//===----------------------------------------------------------------------===// + func.func @extract_scalar_from_vec_1d_f32(%arg0: vector<16xf32>) -> f32 { %0 = vector.extract %arg0[15]: f32 from vector<16xf32> return %0 : f32 @@ -1151,6 +1238,8 @@ func.func @extract_scalar_from_vec_1d_f32(%arg0: vector<16xf32>) -> f32 { // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: return {{.*}} : f32 +// ----- + func.func @extract_scalar_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> f32 { %0 = vector.extract %arg0[15]: f32 from vector<[16]xf32> return %0 : f32 @@ -1173,6 +1262,8 @@ func.func @extract_vec_1e_from_vec_1d_f32(%arg0: vector<16xf32>) -> vector<1xf32 // CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : f32 to vector<1xf32> // CHECK: return %[[T2]] : vector<1xf32> +// ----- + func.func @extract_vec_1e_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> vector<1xf32> { %0 = vector.extract %arg0[15]: vector<1xf32> from vector<[16]xf32> return %0 : vector<1xf32> @@ -1198,6 +1289,8 @@ func.func @extract_scalar_from_vec_1d_index(%arg0: vector<16xindex>) -> index { // CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index // CHECK: return %[[T3]] : index +// ----- + func.func @extract_scalar_from_vec_1d_index_scalable(%arg0: vector<[16]xindex>) -> index { %0 = vector.extract %arg0[15]: index from vector<[16]xindex> return %0 : index @@ -1220,6 +1313,8 @@ func.func @extract_vec_2d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<3 // CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: return {{.*}} : vector<3x16xf32> +// ----- + func.func @extract_vec_2d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<3x[16]xf32> { %0 = vector.extract %arg0[0]: vector<3x[16]xf32> from vector<4x3x[16]xf32> return %0 : vector<3x[16]xf32> @@ -1238,6 +1333,8 @@ func.func @extract_vec_1d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<1 // CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> // CHECK: return {{.*}} : vector<16xf32> +// ----- + func.func @extract_vec_1d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<[16]xf32> { %0 = vector.extract %arg0[0, 0]: vector<[16]xf32> from vector<4x3x[16]xf32> return %0 : vector<[16]xf32> @@ -1258,6 +1355,8 @@ func.func @extract_scalar_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> f32 { // CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> // CHECK: return {{.*}} : f32 +// ----- + func.func @extract_scalar_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> f32 { %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x[16]xf32> return %0 : f32 @@ -1279,6 +1378,8 @@ func.func @extract_scalar_from_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %ar // CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 // CHECK: llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<16xf32> +// ----- + func.func @extract_scalar_from_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: index) -> f32 { %0 = vector.extract %arg0[%arg1]: f32 from vector<[16]xf32> return %0 : f32 @@ -1354,6 +1455,10 @@ func.func @extract_scalar_from_vec_0d_index(%arg0: vector) -> index { // ----- +//===----------------------------------------------------------------------===// +// vector.insertelement +//===----------------------------------------------------------------------===// + func.func @insertelement_into_vec_0d_f32(%arg0: f32, %arg1: vector) -> vector { %1 = vector.insertelement %arg0, %arg1[] : vector return %1 : vector @@ -1363,7 +1468,7 @@ func.func @insertelement_into_vec_0d_f32(%arg0: f32, %arg1: vector) -> vect // CHECK: %[[B:.*]] = builtin.unrealized_conversion_cast %{{.*}} : // CHECK: vector to vector<1xf32> // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C0]] : {{.*}}] : vector<1xf32> +// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C0]] : {{.*}}] : vector<1xf32> // ----- @@ -1375,9 +1480,11 @@ func.func @insertelement_into_vec_1d_f32_idx_as_i32(%arg0: f32, %arg1: vector<4x // CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32( // CHECK-SAME: %[[A:.*]]: f32, // CHECK-SAME: %[[B:.*]]: vector<4xf32>) -// CHECK: %[[c:.*]] = arith.constant 3 : i32 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : vector<4xf32> -// CHECK: return %[[x]] : vector<4xf32> +// CHECK: %[[C:.*]] = arith.constant 3 : i32 +// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<4xf32> +// CHECK: return %[[X]] : vector<4xf32> + +// ----- func.func @insertelement_into_vec_1d_f32_idx_as_i32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { %0 = arith.constant 3 : i32 @@ -1387,9 +1494,9 @@ func.func @insertelement_into_vec_1d_f32_idx_as_i32_scalable(%arg0: f32, %arg1: // CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32_scalable( // CHECK-SAME: %[[A:.*]]: f32, // CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) -// CHECK: %[[c:.*]] = arith.constant 3 : i32 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : i32] : vector<[4]xf32> -// CHECK: return %[[x]] : vector<[4]xf32> +// CHECK: %[[C:.*]] = arith.constant 3 : i32 +// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<[4]xf32> +// CHECK: return %[[X]] : vector<[4]xf32> // ----- @@ -1401,10 +1508,12 @@ func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index(%arg0: f32, %arg1 // CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index( // CHECK-SAME: %[[A:.*]]: f32, // CHECK-SAME: %[[B:.*]]: vector<4xf32>) -// CHECK: %[[c:.*]] = arith.constant 3 : index -// CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[i]] : i64] : vector<4xf32> -// CHECK: return %[[x]] : vector<4xf32> +// CHECK: %[[C:.*]] = arith.constant 3 : index +// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 +// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<4xf32> +// CHECK: return %[[X]] : vector<4xf32> + +// ----- func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { %0 = arith.constant 3 : index @@ -1414,13 +1523,17 @@ func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(%arg0: f // CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable( // CHECK-SAME: %[[A:.*]]: f32, // CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) -// CHECK: %[[c:.*]] = arith.constant 3 : index -// CHECK: %[[i:.*]] = builtin.unrealized_conversion_cast %[[c]] : index to i64 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[i]] : i64] : vector<[4]xf32> -// CHECK: return %[[x]] : vector<[4]xf32> +// CHECK: %[[C:.*]] = arith.constant 3 : index +// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 +// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<[4]xf32> +// CHECK: return %[[X]] : vector<[4]xf32> // ----- +//===----------------------------------------------------------------------===// +// vector.insert +//===----------------------------------------------------------------------===// + func.func @insert_scalar_into_vec_1d_f32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32> return %0 : vector<4xf32> @@ -1430,6 +1543,8 @@ func.func @insert_scalar_into_vec_1d_f32(%arg0: f32, %arg1: vector<4xf32>) -> ve // CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> // CHECK: return {{.*}} : vector<4xf32> +// ----- + func.func @insert_scalar_into_vec_1d_f32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { %0 = vector.insert %arg0, %arg1[3] : f32 into vector<[4]xf32> return %0 : vector<[4]xf32> @@ -1455,6 +1570,7 @@ func.func @insert_scalar_into_vec_1d_index(%arg0: index, %arg1: vector<4xindex>) // CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : vector<4xi64> to vector<4xindex> // CHECK: return %[[T5]] : vector<4xindex> +// ----- func.func @insert_scalar_into_vec_1d_index_scalable(%arg0: index, %arg1: vector<[4]xindex>) -> vector<[4]xindex> { %0 = vector.insert %arg0, %arg1[3] : index into vector<[4]xindex> @@ -1480,6 +1596,8 @@ func.func @insert_vec_2d_into_vec_3d_f32(%arg0: vector<8x16xf32>, %arg1: vector< // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>> // CHECK: return {{.*}} : vector<4x8x16xf32> +// ----- + func.func @insert_vec_2d_into_vec_3d_f32_scalable(%arg0: vector<8x[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { %0 = vector.insert %arg0, %arg1[3] : vector<8x[16]xf32> into vector<4x8x[16]xf32> return %0 : vector<4x8x[16]xf32> @@ -1498,6 +1616,8 @@ func.func @insert_vec_1d_into_vec_3d_f32(%arg0: vector<16xf32>, %arg1: vector<4x // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> // CHECK: return {{.*}} : vector<4x8x16xf32> +// ----- + func.func @insert_vec_1d_into_vec_3d_f32_scalable(%arg0: vector<[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { %0 = vector.insert %arg0, %arg1[3, 7] : vector<[16]xf32> into vector<4x8x[16]xf32> return %0 : vector<4x8x[16]xf32> @@ -1519,6 +1639,8 @@ func.func @insert_scalar_into_vec_3d_f32(%arg0: f32, %arg1: vector<4x8x16xf32>) // CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> // CHECK: return {{.*}} : vector<4x8x16xf32> +// ----- + func.func @insert_scalar_into_vec_3d_f32_scalable(%arg0: f32, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x[16]xf32> return %0 : vector<4x8x[16]xf32> @@ -1543,6 +1665,8 @@ func.func @insert_scalar_into_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %arg // CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 // CHECK: llvm.insertelement %[[SRC]], %[[DST]][%[[UC]] : i64] : vector<16xf32> +// ----- + func.func @insert_scalar_into_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: f32, %arg2: index) -> vector<[16]xf32> { %0 = vector.insert %arg1, %arg0[%arg2]: f32 into vector<[16]xf32> @@ -1567,6 +1691,8 @@ func.func @insert_scalar_into_vec_2d_f32_dynamic_idx(%arg0: vector<1x16xf32>, %a // CHECK-LABEL: @insert_scalar_into_vec_2d_f32_dynamic_idx( // CHECK: vector.insert +// ----- + func.func @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: f32, %idx: index) -> vector<1x[16]xf32> { %0 = vector.insert %arg1, %arg0[0, %idx]: f32 into vector<1x[16]xf32> @@ -1580,6 +1706,12 @@ func.func @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable(%arg0: vector<1x[1 // ----- +//===----------------------------------------------------------------------===// +// vector.type_cast +// +// TODO: Add tests for for vector.type_cast that would cover scalable vectors +//===----------------------------------------------------------------------===// + func.func @type_cast_f32(%arg0: memref<8x8x8xf32>) -> memref> { %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref> return %0 : memref> @@ -1611,11 +1743,11 @@ func.func @type_cast_index(%arg0: memref<8x8x8xindex>) -> memref) -> memref, 3> { +func.func @type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref, 3> { %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref, 3> return %0 : memref, 3> } -// CHECK-LABEL: @vector_type_cast_non_zero_addrspace +// CHECK-LABEL: @type_cast_non_zero_addrspace // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64)> // CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.insertvalue %[[allocated]], {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64)> @@ -1628,6 +1760,10 @@ func.func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> m // ----- +//===----------------------------------------------------------------------===// +// vector.print +//===----------------------------------------------------------------------===// + func.func @print_scalar_i1(%arg0: i1) { vector.print %arg0 : i1 return @@ -1814,6 +1950,10 @@ func.func @print_string() { // ----- +//===----------------------------------------------------------------------===// +// vector.extract_strided_slice +//===----------------------------------------------------------------------===// + func.func @extract_strided_slice_f32_1d_from_1d(%arg0: vector<4xf32>) -> vector<2xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32> return %0 : vector<2xf32> @@ -1857,6 +1997,8 @@ func.func @extract_strided_slice_f32_1d_from_2d(%arg0: vector<4x8xf32>) -> vecto // CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<2 x vector<8xf32>> to vector<2x8xf32> // CHECK: return %[[T5]] +// ----- + func.func @extract_strided_slice_f32_1d_from_2d_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x[8]xf32> to vector<2x[8]xf32> return %0 : vector<2x[8]xf32> @@ -1892,6 +2034,8 @@ func.func @extract_strided_slice_f32_2d_from_2d(%arg0: vector<4x8xf32>) -> vecto // CHECK: %[[VAL_12:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<2 x vector<2xf32>> to vector<2x2xf32> // CHECK: return %[[VAL_12]] : vector<2x2xf32> +// ----- + // NOTE: For scalable vectors, we can only extract "full" scalable dimensions // (e.g. [8] from [8], but not [4] from [8]). @@ -1914,6 +2058,10 @@ func.func @extract_strided_slice_f32_2d_from_2d_scalable(%arg0: vector<4x[8]xf32 // ----- +//===----------------------------------------------------------------------===// +// vector.insert_strided_slice +//===----------------------------------------------------------------------===// + func.func @insert_strided_slice_f32_2d_into_3d(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> { %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> return %0 : vector<4x4x4xf32> @@ -1922,6 +2070,8 @@ func.func @insert_strided_slice_f32_2d_into_3d(%b: vector<4x4xf32>, %c: vector<4 // CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> // CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> +// ----- + func.func @insert_strided_slice_f32_2d_into_3d_scalable(%b: vector<4x[4]xf32>, %c: vector<4x4x[4]xf32>) -> vector<4x4x[4]xf32> { %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xf32> into vector<4x4x[4]xf32> return %0 : vector<4x4x[4]xf32> @@ -1940,6 +2090,8 @@ func.func @insert_strided_index_slice_index_2d_into_3d(%b: vector<4x4xindex>, %c // CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xi64>>> // CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xi64>>> +// ----- + func.func @insert_strided_index_slice_index_2d_into_3d_scalable(%b: vector<4x[4]xindex>, %c: vector<4x4x[4]xindex>) -> vector<4x4x[4]xindex> { %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xindex> into vector<4x4x[4]xindex> return %0 : vector<4x4x[4]xindex> @@ -1973,6 +2125,8 @@ func.func @insert_strided_slice_f32_2d_into_2d(%a: vector<2x2xf32>, %b: vector<4 // CHECK: %[[R4_3:.*]] = llvm.shufflevector %[[R4_2]], %[[V4_3]] [4, 5, 0, 1] : vector<4xf32> // CHECK: llvm.insertvalue %[[R4_3]], {{.*}}[3] : !llvm.array<4 x vector<4xf32>> +// ----- + // NOTE: For scalable dimensions, the corresponding "base" size must match // (i.e. we can only insert "full" scalable dimensions, e.g. [2] into [2], but // not [2] from [4]). @@ -2012,6 +2166,8 @@ func.func @insert_strided_slice_f32_2d_into_3d(%arg0: vector<2x4xf32>, %arg1: ve // CHECK: %[[R8_3:.*]] = llvm.shufflevector %[[R8_2]], %[[V4_0_1]] [8, 9, 0, 1, 2, 3, 14, 15] : vector<8xf32> // CHECK: llvm.insertvalue %[[R8_3]], {{.*}}[1] : !llvm.array<4 x vector<8xf32>> +// ----- + // NOTE: For scalable dimensions, the corresponding "base" size must match // (i.e. we can only insert "full" scalable dimensions, e.g. [4] into [4], but // not [4] from [8]). @@ -2040,74 +2196,84 @@ func.func @insert_strided_slice_f32_2d_into_3d_scalable(%arg0: vector<2x[4]xf32> // ----- -func.func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>, %c: vector<1x1x1xf32>, %d: vector) -> (vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector) { - // CHECK-LABEL: @vector_fma - // CHECK-SAME: %[[A:.*]]: vector<8xf32> - // CHECK-SAME: %[[B:.*]]: vector<2x4xf32> - // CHECK-SAME: %[[C:.*]]: vector<1x1x1xf32> - // CHECK: %[[BL:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> +//===----------------------------------------------------------------------===// +// vector.fma +//===----------------------------------------------------------------------===// + +func.func @fma(%vec_1d: vector<8xf32>, %vec_2d: vector<2x4xf32>, %vec_3d: vector<1x1x1xf32>, %vec_0d: vector) -> (vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector) { + // CHECK-LABEL: @fma + // CHECK-SAME: %[[VEC_1D:.*]]: vector<8xf32> + // CHECK-SAME: %[[VEC_2D:.*]]: vector<2x4xf32> + // CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x1xf32> + // CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> // CHECK: llvm.intr.fmuladd // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> - %0 = vector.fma %a, %a, %a : vector<8xf32> + %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<8xf32> - // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B0:.*]] = llvm.intr.fmuladd(%[[b00]], %[[b01]], %[[b02]]) : + // CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> - // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B1:.*]] = llvm.intr.fmuladd(%[[b10]], %[[b11]], %[[b12]]) : + // CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> + // CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> - // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> - %1 = vector.fma %b, %b, %b : vector<2x4xf32> + // CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> + %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x4xf32> // CHECK: %[[C0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> - %2 = vector.fma %c, %c, %c : vector<1x1x1xf32> + %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x1xf32> // CHECK: %[[D0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> - %3 = vector.fma %d, %d, %d : vector + %3 = vector.fma %vec_0d, %vec_0d, %vec_0d : vector return %0, %1, %2, %3: vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector } -func.func @vector_fma_scalable(%a: vector<[8]xf32>, %b: vector<2x[4]xf32>, %c: vector<1x1x[1]xf32>, %d: vector) -> (vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32>) { - // CHECK-LABEL: @vector_fma_scalable - // CHECK-SAME: %[[A:.*]]: vector<[8]xf32> - // CHECK-SAME: %[[B:.*]]: vector<2x[4]xf32> - // CHECK-SAME: %[[C:.*]]: vector<1x1x[1]xf32> - // CHECK: %[[BL:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x[4]xf32> to !llvm.array<2 x vector<[4]xf32>> +// ----- + +func.func @fma_scalable(%vec_1d: vector<[8]xf32>, %vec_2d: vector<2x[4]xf32>, %vec_3d: vector<1x1x[1]xf32>, %vec_0d: vector) -> (vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32>) { + // CHECK-LABEL: @fma_scalable + // CHECK-SAME: %[[VEC_1D:.*]]: vector<[8]xf32> + // CHECK-SAME: %[[VEC_2D:.*]]: vector<2x[4]xf32> + // CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x[1]xf32> + // CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x[4]xf32> to !llvm.array<2 x vector<[4]xf32>> // CHECK: llvm.intr.fmuladd // CHECK-SAME: (vector<[8]xf32>, vector<[8]xf32>, vector<[8]xf32>) -> vector<[8]xf32> - %0 = vector.fma %a, %a, %a : vector<[8]xf32> + %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<[8]xf32> - // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[B0:.*]] = llvm.intr.fmuladd(%[[b00]], %[[b01]], %[[b02]]) : + // CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) : // CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32> - // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<[4]xf32>> - // CHECK: %[[B1:.*]] = llvm.intr.fmuladd(%[[b10]], %[[b11]], %[[b12]]) : + // CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> + // CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) : // CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32> - // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>> - %1 = vector.fma %b, %b, %b : vector<2x[4]xf32> + // CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>> + %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x[4]xf32> // CHECK: %[[C0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<[1]xf32>, vector<[1]xf32>, vector<[1]xf32>) -> vector<[1]xf32> - %2 = vector.fma %c, %c, %c : vector<1x1x[1]xf32> + %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x[1]xf32> return %0, %1, %2: vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32> } // ----- +//===----------------------------------------------------------------------===// +// vector.reduction +//===----------------------------------------------------------------------===// + func.func @reduce_0d_f32(%arg0: vector) -> f32 { %0 = vector.reduction , %arg0 : vector into f32 return %0 : f32 @@ -2133,6 +2299,8 @@ func.func @reduce_f16(%arg0: vector<16xf16>) -> f16 { // CHECK-SAME: <{fastmathFlags = #llvm.fastmath}> : (f16, vector<16xf16>) -> f16 // CHECK: return %[[V]] : f16 +// ----- + func.func @reduce_f16_scalable(%arg0: vector<[16]xf16>) -> f16 { %0 = vector.reduction , %arg0 : vector<[16]xf16> into f16 return %0 : f16 @@ -2157,6 +2325,8 @@ func.func @reduce_f32(%arg0: vector<16xf32>) -> f32 { // CHECK-SAME: <{fastmathFlags = #llvm.fastmath}> : (f32, vector<16xf32>) -> f32 // CHECK: return %[[V]] : f32 +// ----- + func.func @reduce_f32_scalable(%arg0: vector<[16]xf32>) -> f32 { %0 = vector.reduction , %arg0 : vector<[16]xf32> into f32 return %0 : f32 @@ -2181,6 +2351,8 @@ func.func @reduce_f64(%arg0: vector<16xf64>) -> f64 { // CHECK-SAME: <{fastmathFlags = #llvm.fastmath}> : (f64, vector<16xf64>) -> f64 // CHECK: return %[[V]] : f64 +// ----- + func.func @reduce_f64_scalable(%arg0: vector<[16]xf64>) -> f64 { %0 = vector.reduction , %arg0 : vector<[16]xf64> into f64 return %0 : f64 @@ -2203,6 +2375,8 @@ func.func @reduce_i8(%arg0: vector<16xi8>) -> i8 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: return %[[V]] : i8 +// ----- + func.func @reduce_i8_scalable(%arg0: vector<[16]xi8>) -> i8 { %0 = vector.reduction , %arg0 : vector<[16]xi8> into i8 return %0 : i8 @@ -2223,6 +2397,8 @@ func.func @reduce_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2244,6 +2420,8 @@ func.func @reduce_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.add %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2265,6 +2443,8 @@ func.func @reduce_mul_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_mul_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2286,6 +2466,8 @@ func.func @reduce_mul_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.mul %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_mul_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2308,6 +2490,8 @@ func.func @reduce_fmaximum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { // CHECK: %[[R:.*]] = llvm.intr.maximum(%[[V]], %[[B]]) : (f32, f32) -> f32 // CHECK: return %[[R]] : f32 +// ----- + func.func @reduce_fmaximum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xf32> into f32 return %0 : f32 @@ -2330,6 +2514,8 @@ func.func @reduce_fminimum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { // CHECK: %[[R:.*]] = llvm.intr.minimum(%[[V]], %[[B]]) : (f32, f32) -> f32 // CHECK: return %[[R]] : f32 +// ----- + func.func @reduce_fminimum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xf32> into f32 return %0 : f32 @@ -2352,6 +2538,8 @@ func.func @reduce_fmax_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { // CHECK: %[[R:.*]] = llvm.intr.maxnum(%[[V]], %[[B]]) : (f32, f32) -> f32 // CHECK: return %[[R]] : f32 +// ----- + func.func @reduce_fmax_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xf32> into f32 return %0 : f32 @@ -2374,6 +2562,8 @@ func.func @reduce_fmin_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { // CHECK: %[[R:.*]] = llvm.intr.minnum(%[[V]], %[[B]]) : (f32, f32) -> f32 // CHECK: return %[[R]] : f32 +// ----- + func.func @reduce_fmin_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xf32> into f32 return %0 : f32 @@ -2395,6 +2585,8 @@ func.func @reduce_minui_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_minui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2417,6 +2609,8 @@ func.func @reduce_minui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_minui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2439,6 +2633,8 @@ func.func @reduce_maxui_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_maxui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2461,6 +2657,8 @@ func.func @reduce_maxui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_maxui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2483,6 +2681,8 @@ func.func @reduce_minsi_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_minsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2505,6 +2705,8 @@ func.func @reduce_minsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_minsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2527,6 +2729,8 @@ func.func @reduce_maxsi_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_maxsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2549,6 +2753,8 @@ func.func @reduce_maxsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_maxsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2571,6 +2777,8 @@ func.func @reduce_and_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.and"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_and_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2592,6 +2800,8 @@ func.func @reduce_and_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.and %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_and_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2613,6 +2823,8 @@ func.func @reduce_or_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.or"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_or_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2634,6 +2846,8 @@ func.func @reduce_or_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.or %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_or_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2655,6 +2869,8 @@ func.func @reduce_xor_i32(%arg0: vector<16xi32>) -> i32 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]]) // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_xor_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { %0 = vector.reduction , %arg0 : vector<[16]xi32> into i32 return %0 : i32 @@ -2676,6 +2892,8 @@ func.func @reduce_xor_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { // CHECK: %[[V:.*]] = llvm.xor %[[ACC]], %[[R]] // CHECK: return %[[V]] : i32 +// ----- + func.func @reduce_xor_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { %0 = vector.reduction , %arg0, %arg1 : vector<[16]xi32> into i32 return %0 : i32 @@ -2697,6 +2915,8 @@ func.func @reduce_i64(%arg0: vector<16xi64>) -> i64 { // CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) // CHECK: return %[[V]] : i64 +// ----- + func.func @reduce_i64_scalable(%arg0: vector<[16]xi64>) -> i64 { %0 = vector.reduction , %arg0 : vector<[16]xi64> into i64 return %0 : i64 @@ -2719,6 +2939,8 @@ func.func @reduce_index(%arg0: vector<16xindex>) -> index { // CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index // CHECK: return %[[T2]] : index +// ----- + func.func @reduce_index_scalable(%arg0: vector<[16]xindex>) -> index { %0 = vector.reduction , %arg0 : vector<[16]xindex> into index return %0 : index @@ -2730,9 +2952,13 @@ func.func @reduce_index_scalable(%arg0: vector<[16]xindex>) -> index { // CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index // CHECK: return %[[T2]] : index -// 4x16 16x3 4x3 // ----- +//===----------------------------------------------------------------------===// +// vector.matrix_multiply +//===----------------------------------------------------------------------===// + +// 4x16 16x3 4x3 func.func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> { %C = vector.matrix_multiply %A, %B { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } : @@ -2759,6 +2985,10 @@ func.func @matrix_ops_index(%A: vector<64xindex>, %B: vector<48xindex>) -> vecto // ----- +//===----------------------------------------------------------------------===// +// vector.constant_mask +//===----------------------------------------------------------------------===// + func.func @constant_mask_0d_f() -> vector { %0 = vector.constant_mask [0] : vector return %0 : vector @@ -2852,54 +3082,62 @@ func.func @negative_constant_mask_2d_leading_scalable() -> vector<[4]x4xi1> { // ----- -func.func @create_mask_0d(%a : index) -> vector { - %v = vector.create_mask %a : vector +//===----------------------------------------------------------------------===// +// vector.create_mask +//===----------------------------------------------------------------------===// + +func.func @create_mask_0d(%num_elems : index) -> vector { + %v = vector.create_mask %num_elems : vector return %v: vector } // CHECK-LABEL: func @create_mask_0d -// CHECK-SAME: %[[arg:.*]]: index -// CHECK: %[[indices:.*]] = arith.constant dense<0> : vector -// CHECK: %[[arg_i32:.*]] = arith.index_cast %[[arg]] : index to i32 -// CHECK: %[[bounds:.*]] = llvm.insertelement %[[arg_i32]] -// CHECK: %[[boundsCast:.*]] = builtin.unrealized_conversion_cast %[[bounds]] : vector<1xi32> to vector -// CHECK: %[[result:.*]] = arith.cmpi slt, %[[indices]], %[[boundsCast]] : vector -// CHECK: return %[[result]] : vector +// CHECK-SAME: %[[NUM_ELEMS:.*]]: index +// CHECK: %[[INDICES:.*]] = arith.constant dense<0> : vector +// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 +// CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] +// CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector +// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS_CAST]] : vector +// CHECK: return %[[RESULT]] : vector // ----- -func.func @create_mask_1d(%a : index) -> vector<4xi1> { - %v = vector.create_mask %a : vector<4xi1> +func.func @create_mask_1d(%num_elems : index) -> vector<4xi1> { + %v = vector.create_mask %num_elems : vector<4xi1> return %v: vector<4xi1> } // CHECK-LABEL: func @create_mask_1d -// CHECK-SAME: %[[arg:.*]]: index -// CHECK: %[[indices:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> -// CHECK: %[[arg_i32:.*]] = arith.index_cast %[[arg]] : index to i32 -// CHECK: %[[boundsInsert:.*]] = llvm.insertelement %[[arg_i32]] -// CHECK: %[[bounds:.*]] = llvm.shufflevector %[[boundsInsert]] -// CHECK: %[[result:.*]] = arith.cmpi slt, %[[indices]], %[[bounds]] : vector<4xi32> -// CHECK: return %[[result]] : vector<4xi1> +// CHECK-SAME: %[[NUM_ELEMS:.*]]: index +// CHECK: %[[INDICES:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> +// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 +// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] +// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]] +// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS]] : vector<4xi32> +// CHECK: return %[[RESULT]] : vector<4xi1> // ----- -func.func @create_mask_1d_scalable(%a : index) -> vector<[4]xi1> { - %v = vector.create_mask %a : vector<[4]xi1> +func.func @create_mask_1d_scalable(%num_elems : index) -> vector<[4]xi1> { + %v = vector.create_mask %num_elems : vector<[4]xi1> return %v: vector<[4]xi1> } // CHECK-LABEL: func @create_mask_1d_scalable -// CHECK-SAME: %[[arg:.*]]: index -// CHECK: %[[indices:.*]] = llvm.intr.stepvector : vector<[4]xi32> -// CHECK: %[[arg_i32:.*]] = arith.index_cast %[[arg]] : index to i32 -// CHECK: %[[boundsInsert:.*]] = llvm.insertelement %[[arg_i32]], {{.*}} : vector<[4]xi32> -// CHECK: %[[bounds:.*]] = llvm.shufflevector %[[boundsInsert]], {{.*}} : vector<[4]xi32> -// CHECK: %[[result:.*]] = arith.cmpi slt, %[[indices]], %[[bounds]] : vector<[4]xi32> -// CHECK: return %[[result]] : vector<[4]xi1> +// CHECK-SAME: %[[NUM_ELEMS:.*]]: index +// CHECK: %[[INDICES:.*]] = llvm.intr.stepvector : vector<[4]xi32> +// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 +// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]], {{.*}} : vector<[4]xi32> +// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]], {{.*}} : vector<[4]xi32> +// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS]] : vector<[4]xi32> +// CHECK: return %[[RESULT]] : vector<[4]xi1> // ----- +//===----------------------------------------------------------------------===// +// vector.transpose +//===----------------------------------------------------------------------===// + func.func @transpose_0d(%arg0: vector) -> vector { %0 = vector.transpose %arg0, [] : vector to vector return %0 : vector @@ -2911,6 +3149,10 @@ func.func @transpose_0d(%arg0: vector) -> vector { // ----- +//===----------------------------------------------------------------------===// +// vector.flat_transpose +//===----------------------------------------------------------------------===// + func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } : vector<16xf32> -> vector<16xf32> @@ -2942,195 +3184,221 @@ func.func @flat_transpose_index(%arg0: vector<16xindex>) -> vector<16xindex> { // ----- -func.func @vector_load(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { +func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { + %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } + : vector<16xf32> -> vector<16xf32> + return %0 : vector<16xf32> +} + +// CHECK-LABEL: func @flat_transpose +// CHECK-SAME: %[[A:.*]]: vector<16xf32> +// CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] +// CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : +// CHECK-SAME: vector<16xf32> into vector<16xf32> +// CHECK: return %[[T]] : vector<16xf32> + +// ----- + +//===----------------------------------------------------------------------===// +// vector.load +//===----------------------------------------------------------------------===// + +func.func @load(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<8xf32> return %0 : vector<8xf32> } -// CHECK-LABEL: func @vector_load -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.load %[[gep]] {alignment = 4 : i64} : !llvm.ptr -> vector<8xf32> +// CHECK-LABEL: func @load +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<8xf32> // ----- -func.func @vector_load_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { +func.func @load_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<[8]xf32> return %0 : vector<[8]xf32> } -// CHECK-LABEL: func @vector_load_scalable -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.load %[[gep]] {alignment = 4 : i64} : !llvm.ptr -> vector<[8]xf32> +// CHECK-LABEL: func @load_scalable +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<[8]xf32> // ----- -func.func @vector_load_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { +func.func @load_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<8xf32> return %0 : vector<8xf32> } -// CHECK-LABEL: func @vector_load_nontemporal -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.load %[[gep]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<8xf32> +// CHECK-LABEL: func @load_nontemporal +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<8xf32> // ----- -func.func @vector_load_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { +func.func @load_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[8]xf32> return %0 : vector<[8]xf32> } -// CHECK-LABEL: func @vector_load_nontemporal_scalable -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.load %[[gep]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<[8]xf32> +// CHECK-LABEL: func @load_nontemporal_scalable +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<[8]xf32> // ----- -func.func @vector_load_index(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<8xindex> { +func.func @load_index(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<8xindex> { %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<8xindex> return %0 : vector<8xindex> } -// CHECK-LABEL: func @vector_load_index +// CHECK-LABEL: func @load_index // CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<8xi64> // CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<8xi64> to vector<8xindex> // CHECK: return %[[T1]] : vector<8xindex> // ----- -func.func @vector_load_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<[8]xindex> { +func.func @load_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<[8]xindex> { %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<[8]xindex> return %0 : vector<[8]xindex> } -// CHECK-LABEL: func @vector_load_index_scalable +// CHECK-LABEL: func @load_index_scalable // CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<[8]xi64> // CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<[8]xi64> to vector<[8]xindex> // CHECK: return %[[T1]] : vector<[8]xindex> // ----- -func.func @vector_load_0d(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector { +func.func @load_0d(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector { %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector return %0 : vector } -// CHECK-LABEL: func @vector_load_0d -// CHECK: %[[load:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] -// CHECK: %[[vec:.*]] = llvm.mlir.undef : vector<1xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[inserted:.*]] = llvm.insertelement %[[load]], %[[vec]][%[[c0]] : i32] : vector<1xf32> -// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[inserted]] : vector<1xf32> to vector -// CHECK: return %[[cast]] : vector +// CHECK-LABEL: func @load_0d +// CHECK: %[[LOAD:.*]] = memref.load %{{.*}}[%{{.*}}, %{{.*}}] +// CHECK: %[[VEC:.*]] = llvm.mlir.undef : vector<1xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[INSERTED:.*]] = llvm.insertelement %[[LOAD]], %[[VEC]][%[[C0]] : i32] : vector<1xf32> +// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[INSERTED]] : vector<1xf32> to vector +// CHECK: return %[[CAST]] : vector // ----- +//===----------------------------------------------------------------------===// +// vector.store +//===----------------------------------------------------------------------===// -func.func @vector_store(%memref : memref<200x100xf32>, %i : index, %j : index) { +func.func @store(%memref : memref<200x100xf32>, %i : index, %j : index) { %val = arith.constant dense<11.0> : vector<4xf32> vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<4xf32> return } -// CHECK-LABEL: func @vector_store -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.store %{{.*}}, %[[gep]] {alignment = 4 : i64} : vector<4xf32>, !llvm.ptr +// CHECK-LABEL: func @store +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} : vector<4xf32>, !llvm.ptr // ----- -func.func @vector_store_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { +func.func @store_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { %val = arith.constant dense<11.0> : vector<[4]xf32> vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<[4]xf32> return } -// CHECK-LABEL: func @vector_store_scalable -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.store %{{.*}}, %[[gep]] {alignment = 4 : i64} : vector<[4]xf32>, !llvm.ptr +// CHECK-LABEL: func @store_scalable +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} : vector<[4]xf32>, !llvm.ptr // ----- -func.func @vector_store_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) { +func.func @store_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) { %val = arith.constant dense<11.0> : vector<4xf32> vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<4xf32> return } -// CHECK-LABEL: func @vector_store_nontemporal -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.store %{{.*}}, %[[gep]] {alignment = 4 : i64, nontemporal} : vector<4xf32>, !llvm.ptr +// CHECK-LABEL: func @store_nontemporal +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} : vector<4xf32>, !llvm.ptr // ----- -func.func @vector_store_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { +func.func @store_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { %val = arith.constant dense<11.0> : vector<[4]xf32> vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[4]xf32> return } -// CHECK-LABEL: func @vector_store_nontemporal_scalable -// CHECK: %[[c100:.*]] = llvm.mlir.constant(100 : index) : i64 -// CHECK: %[[mul:.*]] = llvm.mul %{{.*}}, %[[c100]] : i64 -// CHECK: %[[add:.*]] = llvm.add %[[mul]], %{{.*}} : i64 -// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}}[%[[add]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 -// CHECK: llvm.store %{{.*}}, %[[gep]] {alignment = 4 : i64, nontemporal} : vector<[4]xf32>, !llvm.ptr +// CHECK-LABEL: func @store_nontemporal_scalable +// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 +// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 +// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 +// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 +// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} : vector<[4]xf32>, !llvm.ptr // ----- -func.func @vector_store_index(%memref : memref<200x100xindex>, %i : index, %j : index) { +func.func @store_index(%memref : memref<200x100xindex>, %i : index, %j : index) { %val = arith.constant dense<11> : vector<4xindex> vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<4xindex> return } -// CHECK-LABEL: func @vector_store_index +// CHECK-LABEL: func @store_index // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<4xi64>, !llvm.ptr // ----- -func.func @vector_store_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) { +func.func @store_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) { %val = arith.constant dense<11> : vector<[4]xindex> vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<[4]xindex> return } -// CHECK-LABEL: func @vector_store_index_scalable +// CHECK-LABEL: func @store_index_scalable // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<[4]xi64>, !llvm.ptr // ----- -func.func @vector_store_0d(%memref : memref<200x100xf32>, %i : index, %j : index) { +func.func @store_0d(%memref : memref<200x100xf32>, %i : index, %j : index) { %val = arith.constant dense<11.0> : vector vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector return } -// CHECK-LABEL: func @vector_store_0d -// CHECK: %[[val:.*]] = arith.constant dense<1.100000e+01> : vector -// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[val]] : vector to vector<1xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[extracted:.*]] = llvm.extractelement %[[cast]][%[[c0]] : i64] : vector<1xf32> -// CHECK: memref.store %[[extracted]], %{{.*}}[%{{.*}}, %{{.*}}] +// CHECK-LABEL: func @store_0d +// CHECK: %[[VAL:.*]] = arith.constant dense<1.100000e+01> : vector +// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[VAL]] : vector to vector<1xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[EXTRACTED:.*]] = llvm.extractelement %[[CAST]][%[[C0]] : i64] : vector<1xf32> +// CHECK: memref.store %[[EXTRACTED]], %{{.*}}[%{{.*}}, %{{.*}}] // ----- +//===----------------------------------------------------------------------===// +// vector.maskedload +//===----------------------------------------------------------------------===// + func.func @masked_load(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<16xf32>) -> vector<16xf32> { %c0 = arith.constant 0: index %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref, vector<16xi1>, vector<16xf32> into vector<16xf32> @@ -3181,6 +3449,10 @@ func.func @masked_load_index_scalable(%arg0: memref, %arg1: vector<[16] // ----- +//===----------------------------------------------------------------------===// +// vector.maskedstore +//===----------------------------------------------------------------------===// + func.func @masked_store(%arg0: memref, %arg1: vector<16xi1>, %arg2: vector<16xf32>) { %c0 = arith.constant 0: index vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref, vector<16xi1>, vector<16xf32> @@ -3229,6 +3501,10 @@ func.func @masked_store_index_scalable(%arg0: memref, %arg1: vector<[16 // ----- +//===----------------------------------------------------------------------===// +// vector.gather +//===----------------------------------------------------------------------===// + func.func @gather(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> { %0 = arith.constant 0: index %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref, vector<3xi32>, vector<3xi1>, vector<3xf32> into vector<3xf32> @@ -3440,6 +3716,10 @@ func.func @gather_1d_from_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4]x // ----- +//===----------------------------------------------------------------------===// +// vector.scatter +//===----------------------------------------------------------------------===// + func.func @scatter(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) { %0 = arith.constant 0: index vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref, vector<3xi32>, vector<3xi1>, vector<3xf32> @@ -3514,6 +3794,10 @@ func.func @scatter_1d_into_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4] // ----- +//===----------------------------------------------------------------------===// +// vector.expandload +//===----------------------------------------------------------------------===// + func.func @expand_load_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) -> vector<11xf32> { %c0 = arith.constant 0: index %0 = vector.expandload %arg0[%c0], %arg1, %arg2 : memref, vector<11xi1>, vector<11xf32> into vector<11xf32> @@ -3539,6 +3823,10 @@ func.func @expand_load_op_index(%arg0: memref, %arg1: vector<11xi1>, %a // ----- +//===----------------------------------------------------------------------===// +// vector.compressstore +//===----------------------------------------------------------------------===// + func.func @compress_store_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) { %c0 = arith.constant 0: index vector.compressstore %arg0[%c0], %arg1, %arg2 : memref, vector<11xi1>, vector<11xf32> @@ -3563,57 +3851,65 @@ func.func @compress_store_op_index(%arg0: memref, %arg1: vector<11xi1>, // ----- +//===----------------------------------------------------------------------===// +// vector.splat +//===----------------------------------------------------------------------===// + // CHECK-LABEL: @splat_0d -// CHECK-SAME: %[[ARG:.*]]: f32 -func.func @splat_0d(%a: f32) -> vector { - %v = vector.splat %a : vector +// CHECK-SAME: %[[ELT:.*]]: f32 +func.func @splat_0d(%elt: f32) -> vector { + %v = vector.splat %elt : vector return %v : vector } // CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<1xf32> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ARG]], %[[UNDEF]][%[[ZERO]] : i32] : vector<1xf32> +// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<1xf32> // CHECK-NEXT: %[[VCAST:[0-9]+]] = builtin.unrealized_conversion_cast %[[V]] : vector<1xf32> to vector // CHECK-NEXT: return %[[VCAST]] : vector // ----- // CHECK-LABEL: @splat -// CHECK-SAME: %[[A:arg[0-9]+]]: vector<4xf32> -// CHECK-SAME: %[[ELT:arg[0-9]+]]: f32 -func.func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> { - %vb = vector.splat %b : vector<4xf32> - %r = arith.mulf %a, %vb : vector<4xf32> +// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<4xf32> +// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32 +func.func @splat(%vec: vector<4xf32>, %elt: f32) -> vector<4xf32> { + %vb = vector.splat %elt : vector<4xf32> + %r = arith.mulf %vec, %vb : vector<4xf32> return %r : vector<4xf32> } // CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 // CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32> // CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0] -// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[A]], %[[SPLAT]] : vector<4xf32> +// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<4xf32> // CHECK-NEXT: return %[[SCALE]] : vector<4xf32> // ----- // CHECK-LABEL: @splat_scalable -// CHECK-SAME: %[[A:arg[0-9]+]]: vector<[4]xf32> -// CHECK-SAME: %[[ELT:arg[0-9]+]]: f32 -func.func @splat_scalable(%a: vector<[4]xf32>, %b: f32) -> vector<[4]xf32> { - %vb = vector.splat %b : vector<[4]xf32> - %r = arith.mulf %a, %vb : vector<[4]xf32> +// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<[4]xf32> +// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32 +func.func @splat_scalable(%vec: vector<[4]xf32>, %elt: f32) -> vector<[4]xf32> { + %vb = vector.splat %elt : vector<[4]xf32> + %r = arith.mulf %vec, %vb : vector<[4]xf32> return %r : vector<[4]xf32> } // CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<[4]xf32> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 // CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<[4]xf32> // CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0] -// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[A]], %[[SPLAT]] : vector<[4]xf32> +// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<[4]xf32> // CHECK-NEXT: return %[[SCALE]] : vector<[4]xf32> // ----- -// CHECK-LABEL: @vector_scalable_insert +//===----------------------------------------------------------------------===// +// vector.scalable_insert +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @scalable_insert // CHECK-SAME: %[[SUB:.*]]: vector<4xf32>, %[[SV:.*]]: vector<[4]xf32> -func.func @vector_scalable_insert(%sub: vector<4xf32>, %dsv: vector<[4]xf32>) -> vector<[4]xf32> { +func.func @scalable_insert(%sub: vector<4xf32>, %dsv: vector<[4]xf32>) -> vector<[4]xf32> { // CHECK-NEXT: %[[TMP:.*]] = llvm.intr.vector.insert %[[SUB]], %[[SV]][0] : vector<4xf32> into vector<[4]xf32> %0 = vector.scalable.insert %sub, %dsv[0] : vector<4xf32> into vector<[4]xf32> // CHECK-NEXT: llvm.intr.vector.insert %[[SUB]], %[[TMP]][4] : vector<4xf32> into vector<[4]xf32> @@ -3623,9 +3919,13 @@ func.func @vector_scalable_insert(%sub: vector<4xf32>, %dsv: vector<[4]xf32>) -> // ----- -// CHECK-LABEL: @vector_scalable_extract +//===----------------------------------------------------------------------===// +// vector.scalable_extract +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @scalable_extract // CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> -func.func @vector_scalable_extract(%vec: vector<[4]xf32>) -> vector<8xf32> { +func.func @scalable_extract(%vec: vector<[4]xf32>) -> vector<8xf32> { // CHECK-NEXT: %{{.*}} = llvm.intr.vector.extract %[[VEC]][0] : vector<8xf32> from vector<[4]xf32> %0 = vector.scalable.extract %vec[0] : vector<8xf32> from vector<[4]xf32> return %0 : vector<8xf32> @@ -3633,9 +3933,13 @@ func.func @vector_scalable_extract(%vec: vector<[4]xf32>) -> vector<8xf32> { // ----- -// CHECK-LABEL: @vector_interleave_0d +//===----------------------------------------------------------------------===// +// vector.interleave +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @interleave_0d // CHECK-SAME: %[[LHS:.*]]: vector, %[[RHS:.*]]: vector) -func.func @vector_interleave_0d(%a: vector, %b: vector) -> vector<2xi8> { +func.func @interleave_0d(%a: vector, %b: vector) -> vector<2xi8> { // CHECK-DAG: %[[LHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[LHS]] : vector to vector<1xi8> // CHECK-DAG: %[[RHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[RHS]] : vector to vector<1xi8> // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS_RANK1]], %[[RHS_RANK1]] [0, 1] : vector<1xi8> @@ -3646,9 +3950,9 @@ func.func @vector_interleave_0d(%a: vector, %b: vector) -> vector<2xi8> // ----- -// CHECK-LABEL: @vector_interleave_1d +// CHECK-LABEL: @interleave_1d // CHECK-SAME: %[[LHS:.*]]: vector<8xf32>, %[[RHS:.*]]: vector<8xf32>) -func.func @vector_interleave_1d(%a: vector<8xf32>, %b: vector<8xf32>) -> vector<16xf32> { +func.func @interleave_1d(%a: vector<8xf32>, %b: vector<8xf32>) -> vector<16xf32> { // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS]], %[[RHS]] [0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15] : vector<8xf32> // CHECK: return %[[ZIP]] %0 = vector.interleave %a, %b : vector<8xf32> -> vector<16xf32> @@ -3657,9 +3961,9 @@ func.func @vector_interleave_1d(%a: vector<8xf32>, %b: vector<8xf32>) -> vector< // ----- -// CHECK-LABEL: @vector_interleave_1d_scalable +// CHECK-LABEL: @interleave_1d_scalable // CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, %[[RHS:.*]]: vector<[4]xi32>) -func.func @vector_interleave_1d_scalable(%a: vector<[4]xi32>, %b: vector<[4]xi32>) -> vector<[8]xi32> { +func.func @interleave_1d_scalable(%a: vector<[4]xi32>, %b: vector<[4]xi32>) -> vector<[8]xi32> { // CHECK: %[[ZIP:.*]] = "llvm.intr.vector.interleave2"(%[[LHS]], %[[RHS]]) : (vector<[4]xi32>, vector<[4]xi32>) -> vector<[8]xi32> // CHECK: return %[[ZIP]] %0 = vector.interleave %a, %b : vector<[4]xi32> -> vector<[8]xi32> @@ -3668,9 +3972,9 @@ func.func @vector_interleave_1d_scalable(%a: vector<[4]xi32>, %b: vector<[4]xi32 // ----- -// CHECK-LABEL: @vector_interleave_2d +// CHECK-LABEL: @interleave_2d // CHECK-SAME: %[[LHS:.*]]: vector<2x3xi8>, %[[RHS:.*]]: vector<2x3xi8>) -func.func @vector_interleave_2d(%a: vector<2x3xi8>, %b: vector<2x3xi8>) -> vector<2x6xi8> { +func.func @interleave_2d(%a: vector<2x3xi8>, %b: vector<2x3xi8>) -> vector<2x6xi8> { // CHECK: llvm.shufflevector // CHECK-NOT: vector.interleave {{.*}} : vector<2x3xi8> %0 = vector.interleave %a, %b : vector<2x3xi8> -> vector<2x6xi8> @@ -3679,9 +3983,9 @@ func.func @vector_interleave_2d(%a: vector<2x3xi8>, %b: vector<2x3xi8>) -> vecto // ----- -// CHECK-LABEL: @vector_interleave_2d_scalable +// CHECK-LABEL: @interleave_2d_scalable // CHECK-SAME: %[[LHS:.*]]: vector<2x[8]xi16>, %[[RHS:.*]]: vector<2x[8]xi16>) -func.func @vector_interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]xi16>) -> vector<2x[16]xi16> { +func.func @interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]xi16>) -> vector<2x[16]xi16> { // CHECK: llvm.intr.vector.interleave2 // CHECK-NOT: vector.interleave {{.*}} : vector<2x[8]xi16> %0 = vector.interleave %a, %b : vector<2x[8]xi16> -> vector<2x[16]xi16> @@ -3690,90 +3994,108 @@ func.func @vector_interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8] // ----- -// CHECK-LABEL: @vector_deinterleave_1d -// CHECK-SAME: (%[[SRC:.*]]: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) -func.func @vector_deinterleave_1d(%a: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) { +//===----------------------------------------------------------------------===// +// vector.deinterleave +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @deinterleave_1d +// CHECK-SAME: (%[[ARG:.*]]: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) +func.func @deinterleave_1d(%arg: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) { // CHECK: %[[POISON:.*]] = llvm.mlir.poison : vector<4xi32> - // CHECK: llvm.shufflevector %[[SRC]], %[[POISON]] [0, 2] : vector<4xi32> - // CHECK: llvm.shufflevector %[[SRC]], %[[POISON]] [1, 3] : vector<4xi32> - %0, %1 = vector.deinterleave %a : vector<4xi32> -> vector<2xi32> + // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [0, 2] : vector<4xi32> + // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [1, 3] : vector<4xi32> + %0, %1 = vector.deinterleave %arg : vector<4xi32> -> vector<2xi32> return %0, %1 : vector<2xi32>, vector<2xi32> } -// CHECK-LABEL: @vector_deinterleave_1d_scalable -// CHECK-SAME: %[[SRC:.*]]: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) -func.func @vector_deinterleave_1d_scalable(%a: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) { - // CHECK: %[[RES:.*]] = "llvm.intr.vector.deinterleave2"(%[[SRC]]) : (vector<[4]xi32>) -> !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> +// ----- + +// CHECK-LABEL: @deinterleave_1d_scalable +// CHECK-SAME: %[[ARG:.*]]: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) +func.func @deinterleave_1d_scalable(%arg: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) { + // CHECK: %[[RES:.*]] = "llvm.intr.vector.deinterleave2"(%[[ARG]]) : (vector<[4]xi32>) -> !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> // CHECK: llvm.extractvalue %[[RES]][0] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> // CHECK: llvm.extractvalue %[[RES]][1] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> - %0, %1 = vector.deinterleave %a : vector<[4]xi32> -> vector<[2]xi32> + %0, %1 = vector.deinterleave %arg : vector<[4]xi32> -> vector<[2]xi32> return %0, %1 : vector<[2]xi32>, vector<[2]xi32> } -// CHECK-LABEL: @vector_deinterleave_2d -// CHECK-SAME: %[[SRC:.*]]: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) -func.func @vector_deinterleave_2d(%a: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) { +// ----- + +// CHECK-LABEL: @deinterleave_2d +// CHECK-SAME: %[[ARG:.*]]: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) +func.func @deinterleave_2d(%arg: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) { // CHECK: llvm.shufflevector // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x8xf32> - %0, %1 = vector.deinterleave %a : vector<2x8xf32> -> vector<2x4xf32> + %0, %1 = vector.deinterleave %arg : vector<2x8xf32> -> vector<2x4xf32> return %0, %1 : vector<2x4xf32>, vector<2x4xf32> } -func.func @vector_deinterleave_2d_scalable(%a: vector<2x[8]xf32>) -> (vector<2x[4]xf32>, vector<2x[4]xf32>) { +// ----- + +func.func @deinterleave_2d_scalable(%arg: vector<2x[8]xf32>) -> (vector<2x[4]xf32>, vector<2x[4]xf32>) { // CHECK: llvm.intr.vector.deinterleave2 // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x[8]xf32> - %0, %1 = vector.deinterleave %a : vector<2x[8]xf32> -> vector<2x[4]xf32> + %0, %1 = vector.deinterleave %arg : vector<2x[8]xf32> -> vector<2x[4]xf32> return %0, %1 : vector<2x[4]xf32>, vector<2x[4]xf32> } // ----- -// CHECK-LABEL: func.func @vector_from_elements_1d( -// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: f32) -// CHECK: %[[undef:.*]] = llvm.mlir.undef : vector<3xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[insert0:.*]] = llvm.insertelement %[[a]], %[[undef]][%[[c0]] : i64] : vector<3xf32> -// CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[insert1:.*]] = llvm.insertelement %[[b]], %[[insert0]][%[[c1]] : i64] : vector<3xf32> -// CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : i64) : i64 -// CHECK: %[[insert2:.*]] = llvm.insertelement %[[a]], %[[insert1]][%[[c2]] : i64] : vector<3xf32> -// CHECK: return %[[insert2]] -func.func @vector_from_elements_1d(%a: f32, %b: f32) -> vector<3xf32> { - %0 = vector.from_elements %a, %b, %a : vector<3xf32> +//===----------------------------------------------------------------------===// +// vector.from_elements +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func.func @from_elements_1d( +// CHECK-SAME: %[[ARG_0:.*]]: f32, %[[ARG_1:.*]]: f32) +// CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : vector<3xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<3xf32> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[INSERT1:.*]] = llvm.insertelement %[[ARG_1]], %[[INSERT0]][%[[C1]] : i64] : vector<3xf32> +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 +// CHECK: %[[INSERT2:.*]] = llvm.insertelement %[[ARG_0]], %[[INSERT1]][%[[C2]] : i64] : vector<3xf32> +// CHECK: return %[[INSERT2]] +func.func @from_elements_1d(%arg0: f32, %arg1: f32) -> vector<3xf32> { + %0 = vector.from_elements %arg0, %arg1, %arg0 : vector<3xf32> return %0 : vector<3xf32> } // ----- -// CHECK-LABEL: func.func @vector_from_elements_0d( -// CHECK-SAME: %[[a:.*]]: f32) -// CHECK: %[[undef:.*]] = llvm.mlir.undef : vector<1xf32> -// CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[insert0:.*]] = llvm.insertelement %[[a]], %[[undef]][%[[c0]] : i64] : vector<1xf32> -// CHECK: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[insert0]] : vector<1xf32> to vector -// CHECK: return %[[cast]] -func.func @vector_from_elements_0d(%a: f32) -> vector { - %0 = vector.from_elements %a : vector +// CHECK-LABEL: func.func @from_elements_0d( +// CHECK-SAME: %[[ARG_0:.*]]: f32) +// CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : vector<1xf32> +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<1xf32> +// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[INSERT0]] : vector<1xf32> to vector +// CHECK: return %[[CAST]] +func.func @from_elements_0d(%arg0: f32) -> vector { + %0 = vector.from_elements %arg0 : vector return %0 : vector } // ----- -// CHECK-LABEL: @vector_step_scalable -// CHECK: %[[STEPVECTOR:.*]] = llvm.intr.stepvector : vector<[4]xi64> -// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[STEPVECTOR]] : vector<[4]xi64> to vector<[4]xindex> -// CHECK: return %[[CAST]] : vector<[4]xindex> -func.func @vector_step_scalable() -> vector<[4]xindex> { - %0 = vector.step : vector<[4]xindex> - return %0 : vector<[4]xindex> -} - -// ----- +//===----------------------------------------------------------------------===// +// vector.step +//===----------------------------------------------------------------------===// -// CHECK-LABEL: @vector_step +// CHECK-LABEL: @step // CHECK: %[[CST:.+]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex> // CHECK: return %[[CST]] : vector<4xindex> -func.func @vector_step() -> vector<4xindex> { +func.func @step() -> vector<4xindex> { %0 = vector.step : vector<4xindex> return %0 : vector<4xindex> } + +// ----- + +// CHECK-LABEL: @step_scalable +// CHECK: %[[STEPVECTOR:.*]] = llvm.intr.stepvector : vector<[4]xi64> +// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[STEPVECTOR]] : vector<[4]xi64> to vector<[4]xindex> +// CHECK: return %[[CAST]] : vector<[4]xindex> +func.func @step_scalable() -> vector<[4]xindex> { + %0 = vector.step : vector<[4]xindex> + return %0 : vector<[4]xindex> +}