pytorch
diff --git a/‎backends/arm/scripts/parse_test_names.py
Lines changed: 102 additions & 0 deletions b/‎backends/arm/scripts/parse_test_names.py
Lines changed: 102 additions & 0 deletions
diff --git a/‎backends/arm/scripts/pre-push
Lines changed: 38 additions & 0 deletions b/‎backends/arm/scripts/pre-push
Lines changed: 38 additions & 0 deletions
diff --git a/‎backends/cadence/aot/ops_registrations.py
Lines changed: 9 additions & 9 deletions b/‎backends/cadence/aot/ops_registrations.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎backends/vulkan/_passes/int4_weight_only_quantizer.py
Lines changed: 1 addition & 13 deletions b/‎backends/vulkan/_passes/int4_weight_only_quantizer.py
Lines changed: 1 addition & 13 deletions
diff --git a/‎backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
Lines changed: 18 additions & 5 deletions b/‎backends/vulkan/_passes/squeeze_unsqueeze_inputs.py
Lines changed: 18 additions & 5 deletions
diff --git a/‎backends/vulkan/op_registry.py
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/op_registry.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 1 addition & 3 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.cpp
Lines changed: 1 addition & 3 deletions
@@ -0,0 +1,102 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from executorch.exir.dialects.edge.spec.utils import SAMPLE_INPUT
+
+# Add edge ops which we lower but which are not included in exir/dialects/edge/edge.yaml here.
+CUSTOM_EDGE_OPS = ["linspace.default", "eye.default"]
+ALL_EDGE_OPS = SAMPLE_INPUT.keys() | CUSTOM_EDGE_OPS
+
+# Add all targets and TOSA profiles we support here.
+TARGETS = {"tosa_BI", "tosa_MI", "u55_BI", "u85_BI"}
+
+
+def get_edge_ops():
+    """
+    Returns a set with edge_ops with names on the form to be used in unittests:
+    1. Names are in lowercase.
+    2. Overload is ignored if it is 'default', otherwise its appended with an underscore.
+    3. Overly verbose name are shortened by removing certain prefixes/suffixes.
+
+    Examples:
+        abs.default -> abs
+        split_copy.Tensor -> split_tensor
+    """
+    edge_ops = set()
+    for edge_name in ALL_EDGE_OPS:
+        op, overload = edge_name.split(".")
+
+        # Normalize names
+        op = op.lower()
+        op = op.removeprefix("_")
+        op = op.removesuffix("_copy")
+        op = op.removesuffix("_with_indices")
+        op = op.removesuffix("_no_training")
+        overload = overload.lower()
+
+        if overload == "default":
+            edge_ops.add(op)
+        else:
+            edge_ops.add(f"{op}_{overload}")
+
+    return edge_ops
+
+
+def parse_test_name(test_name: str, edge_ops: set[str]) -> tuple[str, str, bool]:
+    """
+    Parses a test name on the form
+        test_OP_TARGET_<not_delegated>_<any_other_info>
+    where OP must match a string in edge_ops and TARGET must match one string in TARGETS.
+    The "not_delegated" suffix indicates that the test tests that the op is not delegated.
+
+    Examples of valid names: "test_mm_u55_BI_not_delegated" or "test_add_scalar_tosa_MI_two_inputs".
+
+    Returns a tuple (OP, TARGET, IS_DELEGATED) if valid.
+    """
+    test_name = test_name.removeprefix("test_")
+    is_delegated = "not_delegated" not in test_name
+    assert (
+        "reject" not in test_name
+    ), f"Use 'not_delegated' instead of 'reject' in {test_name}"
+
+    op = "None"
+    target = "None"
+    for potential_target in TARGETS:
+        index = test_name.find(potential_target)
+        if index != -1:
+            op = test_name[: index - 1]
+            target = potential_target
+            break
+    # Special case for convolution
+    op = op.removesuffix("_1d")
+    op = op.removesuffix("_2d")
+
+    assert target != "None", f"{test_name} does not contain one of {TARGETS}"
+    assert (
+        op in edge_ops
+    ), f"Parsed unvalid OP from {test_name}, {op} does not exist in edge.yaml or CUSTOM_EDGE_OPS"
+
+    return op, target, is_delegated
+
+
+if __name__ == "__main__":
+    """Parses a list of test names given on the commandline."""
+    import sys
+
+    sys.tracebacklimit = 0  # Do not print stack trace
+
+    edge_ops = get_edge_ops()
+    exit_code = 0
+
+    for test_name in sys.argv[1:]:
+        try:
+            assert test_name[:5] == "test_", f"Unexpected input: {test_name}"
+            parse_test_name(test_name, edge_ops)
+        except AssertionError as e:
+            print(e)
+            exit_code = 1
+        else:
+            print(f"{test_name} OK")
+
+    sys.exit(exit_code)
@@ -166,6 +166,44 @@ for COMMIT in ${COMMITS}; do
         fi
     fi
 
+    # Op test checks
+    op_test_files=$(echo $commit_files | grep -oE 'backends/arm/test/ops/\S+')
+    if [ "$op_test_files" ]; then
+
+        # TODO: These checks can be removed when all unittests are refactored.
+        if grep -icq "SkipIfNoCorstone" $op_test_files; then
+            echo -e "${ERROR} @SkipIfNoCorstone300/320 is deprecated;"\
+                "please use XfailIfNoCorstone300/320 instead." >&2
+            FAILED=1
+        fi
+
+        if grep -icq "conftest.expectedFailureOnFVP" $op_test_files; then
+            echo -e "${ERROR} @conftest.expectedFailureOnFVP is deprecated;"\
+                "please use XfailIfCorstone300/320 instead." >&2
+            FAILED=1
+        fi
+
+        if grep -icq "unittest.TestCase" $op_test_files; then
+            echo -e "${ERROR} Use of the Unittest test framework is deprecated;"\
+            "please use Pytest instead." >&2
+            FAILED=1
+        fi
+
+        if grep -icq "on_fvp(" $op_test_files; then
+            echo -e "${ERROR} All unittests should run on FVP if relevant,"\
+            "on_fvp suffix can be excluded." >&2
+            FAILED=1
+        fi
+
+        # Check that the tested op and target is parsed correctly from the test name
+        test_names=$(grep -h "def test_" $op_test_files | cut -d"(" -f1 | cut -d" " -f2)
+        python ./backends/arm/scripts/parse_test_names.py $test_names
+        if [ $? -ne 0 ]; then
+            echo -e "${ERROR} Failed op test name check." >&2
+            FAILED=1
+        fi
+    fi
+
     echo "" # Newline to visually separate commit processing
 done
 
 
@@ -293,6 +293,15 @@
     "attention_mask.out(Tensor input, Tensor start, Tensor stop, *, Tensor(a!) out) -> Tensor(a!)"
 )
 
+# Custom ops in aten namespace. RMSNorm is usually decomposed, so having
+# an out-variant is non-standard
+
+lib_aten = Library("aten", "FRAGMENT")
+
+lib_aten.define(
+    "rms_norm.out(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)"
+)
+
 
 @register_fake("cadence::quantize_per_tensor")
 def quantize_per_tensor_meta(
@@ -619,15 +628,6 @@ def linalg_vector_norm_meta(
     return X.new_empty([], dtype=X.dtype)
 
 
-@register_fake("cadence::rms_norm")
-def rms_norm_meta(
-    X: torch.Tensor,
-    eps: float,
-    weight: torch.Tensor,
-) -> torch.Tensor:
-    return X.new_empty(X.shape, dtype=X.dtype)
-
-
 @register_fake("cadence::requantize")
 def requantize_meta(
     input: torch.Tensor,
 
@@ -118,9 +118,6 @@ def _vk_replace_linear_int4(
     # Use custom vulkan linear layer as default
     linear_class: Type[torch.nn.Module] = VkWeightOnlyInt4Linear,
     copy_weights: bool = False,
-    # Serves the same purpose as `tensor_dim_limit` in
-    # executorch.backends.vulkan.partitioner.VulkanSupportedOperators
-    feature_limit: int = 16384,
 ):
     for name, child in module.named_children():
         if isinstance(child, torch.nn.Linear) and (
@@ -131,8 +128,6 @@ def _vk_replace_linear_int4(
             if (
                 _check_linear_int4_k(child.in_features, groupsize, inner_k_tiles)
                 or padding_allowed
-            ) and (
-                child.out_features < feature_limit and child.in_features < feature_limit
             ):
                 new_linear = linear_class(
                     child.in_features,
@@ -175,7 +170,6 @@ def __init__(
         inner_k_tiles: Optional[int] = 8,
         device: torch.device = torch.device("cpu"),  # noqa
         precision: torch.dtype = torch.float32,
-        feature_limit: int = 16384,
     ) -> None:
         super().__init__()
         assert inner_k_tiles in [2, 4, 8]
@@ -186,9 +180,6 @@ def __init__(
         self.padding_allowed: bool = padding_allowed
         self.device: torch.device = device
         self.precision: torch.dtype = precision
-        # Serves the same purpose as `tensor_dim_limit` in
-        # executorch.backends.vulkan.partitioner.VulkanSupportedOperators
-        self.feature_limit = feature_limit
 
     @torch.no_grad()
     def _create_quantized_state_dict(
@@ -197,10 +188,7 @@ def _create_quantized_state_dict(
         cur_state_dict = model.state_dict()
         for fqn, mod in model.named_modules():
             # Add additional check to make sure features do not exceed feature limit
-            if isinstance(mod, torch.nn.Linear) and (
-                mod.out_features < self.feature_limit
-                and mod.in_features < self.feature_limit
-            ):
+            if isinstance(mod, torch.nn.Linear):
                 out_features = mod.out_features
                 in_features = mod.in_features
                 logging.info(f"linear: {fqn}, in={in_features}, out={out_features}")
 
@@ -27,25 +27,38 @@ class SqueezeUnsqueezeInputs(ExportPass):
         exir_ops.edge.aten.gelu.default,
     }
 
+    def should_squeeze(self, op, shape: List[int]) -> bool:  # pyre-ignore
+        if len(shape) == 3:
+            return shape[1] == 1 and shape[0] > 1
+        if len(shape) == 4:
+            # No need to squeeze if all dims are 1 except the width dim
+            if all(dim == 1 for dim in shape[:-1]):
+                return False
+            # Otherwise, check for squeezable dim
+            return 1 in shape[:-1]
+
+        # Prefer not to introduce additional orchestration ops by default
+        return False
+
     def call_operator(
         self,
         op,  # pyre-ignore
         args: Tuple[Argument, ...],
         kwargs: Dict[str, Argument],
         meta: NodeMetadata,
     ) -> ProxyValue:
-        def _squeezable(shape: List[int]) -> bool:
-            return len(shape) > 2 and 1 in shape
-
         if op not in self._squeezable_ops:
             return super().call_operator(op, args, kwargs, meta)
-
         # pyre-ignore[16]: `None` has no attribute `node`
         input_shape = args[0].node.meta["val"].shape
         output_shape = meta["val"].shape
-        if not _squeezable(input_shape):
+
+        if not self.should_squeeze(op, input_shape):
             return super().call_operator(op, args, kwargs, meta)
 
+        def _squeezable(shape: List[int]) -> bool:
+            return len(shape) > 2 and 1 in shape
+
         # squeeze input tensor
         squeeze_shape = list(input_shape)
         while _squeezable(squeeze_shape):
 
@@ -393,6 +393,7 @@ def register_int8_mm_op(features: OpFeatures):
 
 @update_features(exir_ops.edge.et_vk.linear_weight_int4.default)
 def register_int4_mm_op(features: OpFeatures):
+    features.buffer_impl = True
     features.texture_impl = TextureImplFeatures(
         uses_axis_map=False,
         valid_packed_dims={PackedDim.WIDTH},
@@ -401,6 +402,7 @@ def register_int4_mm_op(features: OpFeatures):
     features.optimal_storage = VkStorageType.TEXTURE_3D
     features.optimal_layout = VkMemoryLayout.TENSOR_WIDTH_PACKED
     features.handles_own_prepacking = True
+    features.skip_limits_check = {1}
     return features
 
 
 
@@ -497,9 +497,7 @@ vTensor::vTensor(
   VK_CHECK_COND(
       dim_order_is_valid(dim_order_), "computed dim order is invalid");
 
-  if (storage_type != utils::kBuffer) {
-    set_logical_limits(storage_.image_extents_);
-  }
+  set_logical_limits(storage_.image_extents_);
 }
 
 // NOLINTNEXTLINE
Original file line number	Diff line number	Diff line change
`@@ -497,9 +497,7 @@ vTensor::vTensor(`
`497`	`497`	`VK_CHECK_COND(`
`498`	`498`	`dim_order_is_valid(dim_order_), "computed dim order is invalid");`
`499`	`499`
`500`		`- if (storage_type != utils::kBuffer) {`
`501`		`- set_logical_limits(storage_.image_extents_);`
`502`		`- }`
	`500`	`+ set_logical_limits(storage_.image_extents_);`
`503`	`501`	`}`
`504`	`502`
`505`	`503`	`// NOLINTNEXTLINE`