intelowlproject · fgibertoni · Apr 11, 2025 · Mar 23, 2025 · Mar 24, 2025 · Mar 26, 2025
diff --git a/api_app/analyzers_manager/file_analyzers/debloat.py b/api_app/analyzers_manager/file_analyzers/debloat.py
@@ -0,0 +1,137 @@
+# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
+# See the file 'LICENSE' for copying permission.
+
+import hashlib
+import logging
+import os
+from base64 import b64encode
+from tempfile import TemporaryDirectory
+
+import pefile
+from debloat.processor import process_pe
+
+from api_app.analyzers_manager.classes import FileAnalyzer
+from api_app.analyzers_manager.exceptions import AnalyzerRunException
+from tests.mock_utils import MockUpResponse, if_mock_connections, patch
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+class Debloat(FileAnalyzer):
+
+    def run(self):
+        try:
+            binary = pefile.PE(self.filepath, fast_load=True)
+        except pefile.PEFormatError as e:
+            raise AnalyzerRunException(f"Invalid PE file: {e}")
+
+        # BBOT logger is passing invalid kwargs to logger.info like "end" and "flush"
+        def log_message(*args, end="\n", **kwargs):
+            message = " ".join(map(str, args))
+            if end:
+                message += end
+            valid_kwargs = {}
+            for key, value in kwargs.items():
+                if key in [
+                    "level",
+                    "exc_info",
+                    "stack_info",
+                    "extra",
+                    "msg",
+                    "args",
+                    "kwargs",
+                ]:
+                    valid_kwargs[key] = value
+            logger.info(message, **valid_kwargs)
+
+        with TemporaryDirectory() as temp_dir:
+            output_path = os.path.join(temp_dir, "debloated.exe")
+            original_size = os.path.getsize(self.filepath)
+
+            try:
+                debloat_code = process_pe(
+                    binary,
+                    out_path=output_path,
+                    last_ditch_processing=True,
+                    cert_preservation=True,
+                    log_message=log_message,
+                    beginning_file_size=original_size,
+                )
+            except OSError as e:
+                raise AnalyzerRunException(
+                    f"File operation failed during Debloat processing: {e}"
+                )
+            except ValueError as e:
+                raise AnalyzerRunException(
+                    f"Invalid parameter in Debloat processing: {e}"
+                )
+            except AttributeError as e:
+                raise AnalyzerRunException(
+                    f"Debloat library error, possibly malformed PE object: {e}"
+                )
+
+            if debloat_code == 0 and not os.path.exists(output_path):
+                return {
+                    "success": False,
+                    "error": "No solution found",
+                }
+
+            if not os.path.exists(output_path) or not os.path.isfile(output_path):
+                raise AnalyzerRunException(
+                    "Debloat did not produce a valid output file"
+                )
+
+            debloated_size = os.path.getsize(output_path)
+            size_reduction = (
+                (original_size - debloated_size) / original_size * 100
+                if original_size > 0
+                else 0
+            )
+
+            with open(output_path, "rb") as f:
+                output = f.read()
+                debloated_hash = hashlib.md5(output).hexdigest()
+                debloated_sha256 = hashlib.sha256(output).hexdigest()
+
+            encoded_output = b64encode(output).decode("utf-8")
+
+            os.remove(output_path)
+            logger.info("Cleaned up temporary file.")
+
+            return {
+                "success": True,
+                "original_size": original_size,
+                "debloated_size": debloated_size,
+                "debloated_file": encoded_output,
+                "size_reduction_percentage": size_reduction,
+                "debloated_hash": debloated_hash,
+                "debloated_sha256": debloated_sha256,
+            }
+
+    @classmethod
+    def update(cls) -> bool:
+        pass
+
+    @classmethod
+    def _monkeypatch(cls, patches: list = None):
+        patches = [
+            if_mock_connections(
+                patch(
+                    "debloat.processor.process_pe",
+                    return_value=MockUpResponse(
+                        {
+                            "success": True,
+                            "original_size": 3840392,
+                            "debloated_file": "TVqQAAMAAAAEAAAA//",
+                            "debloated_hash": "f7f92eadfb444e7fce27efa2007a955a",
+                            "debloated_size": 813976,
+                            "size_reduction_percentage": 78.80487200264973,
+                            "debloated_sha256": "f7f92eadfb444e7fce27efa2007a955a",
+                        },
+                        200,
+                    ),
+                )
+            ),
+        ]
+        return super()._monkeypatch(patches)
diff --git a/api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py b/api_app/analyzers_manager/migrations/0155_analyzer_config_debloat.py
@@ -0,0 +1,125 @@
+from django.db import migrations
+from django.db.models.fields.related_descriptors import (
+    ForwardManyToOneDescriptor,
+    ForwardOneToOneDescriptor,
+    ManyToManyDescriptor,
+    ReverseManyToOneDescriptor,
+    ReverseOneToOneDescriptor,
+)
+
+plugin = {
+    "python_module": {
+        "health_check_schedule": None,
+        "update_schedule": None,
+        "module": "debloat.Debloat",
+        "base_path": "api_app.analyzers_manager.file_analyzers",
+    },
+    "name": "Debloat",
+    "description": '"Analyzer for debloating PE files using the [Debloat](https://github.com/Squiblydoo/debloat) tool. Reduces file size for easier malware analysis."',
+    "disabled": False,
+    "soft_time_limit": 300,
+    "routing_key": "default",
+    "health_check_status": True,
+    "type": "file",
+    "docker_based": False,
+    "maximum_tlp": "RED",
+    "observable_supported": [],
+    "supported_filetypes": ["application/vnd.microsoft.portable-executable"],
+    "run_hash": False,
+    "run_hash_type": "",
+    "not_supported_filetypes": [],
+    "mapping_data_model": {},
+    "model": "analyzers_manager.AnalyzerConfig",
+}
+
+params = []
+
+values = []
+
+
+def _get_real_obj(Model, field, value):
+    def _get_obj(Model, other_model, value):
+        if isinstance(value, dict):
+            real_vals = {}
+            for key, real_val in value.items():
+                real_vals[key] = _get_real_obj(other_model, key, real_val)
+            value = other_model.objects.get_or_create(**real_vals)[0]
+        # it is just the primary key serialized
+        else:
+            if isinstance(value, int):
+                if Model.__name__ == "PluginConfig":
+                    value = other_model.objects.get(name=plugin["name"])
+                else:
+                    value = other_model.objects.get(pk=value)
+            else:
+                value = other_model.objects.get(name=value)
+        return value
+
+    if (
+        type(getattr(Model, field))
+        in [
+            ForwardManyToOneDescriptor,
+            ReverseManyToOneDescriptor,
+            ReverseOneToOneDescriptor,
+            ForwardOneToOneDescriptor,
+        ]
+        and value
+    ):
+        other_model = getattr(Model, field).get_queryset().model
+        value = _get_obj(Model, other_model, value)
+    elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
+        other_model = getattr(Model, field).rel.model
+        value = [_get_obj(Model, other_model, val) for val in value]
+    return value
+
+
+def _create_object(Model, data):
+    mtm, no_mtm = {}, {}
+    for field, value in data.items():
+        value = _get_real_obj(Model, field, value)
+        if type(getattr(Model, field)) is ManyToManyDescriptor:
+            mtm[field] = value
+        else:
+            no_mtm[field] = value
+    try:
+        o = Model.objects.get(**no_mtm)
+    except Model.DoesNotExist:
+        o = Model(**no_mtm)
+        o.full_clean()
+        o.save()
+        for field, value in mtm.items():
+            attribute = getattr(o, field)
+            if value is not None:
+                attribute.set(value)
+        return False
+    return True
+
+
+def migrate(apps, schema_editor):
+    Parameter = apps.get_model("api_app", "Parameter")
+    PluginConfig = apps.get_model("api_app", "PluginConfig")
+    python_path = plugin.pop("model")
+    Model = apps.get_model(*python_path.split("."))
+    if not Model.objects.filter(name=plugin["name"]).exists():
+        exists = _create_object(Model, plugin)
+        if not exists:
+            for param in params:
+                _create_object(Parameter, param)
+            for value in values:
+                _create_object(PluginConfig, value)
+
+
+def reverse_migrate(apps, schema_editor):
+    python_path = plugin.pop("model")
+    Model = apps.get_model(*python_path.split("."))
+    Model.objects.get(name=plugin["name"]).delete()
+
+
+class Migration(migrations.Migration):
+    atomic = False
+    dependencies = [
+        ("api_app", "0071_delete_last_elastic_report"),
+        ("analyzers_manager", "0154_analyzer_config_bbot"),
+    ]
+
+    operations = [migrations.RunPython(migrate, reverse_migrate)]
diff --git a/docker/test.override.yml b/docker/test.override.yml
@@ -1,43 +1,43 @@
-services:
-  uwsgi:
-    build:
-      context: ..
-      dockerfile: docker/Dockerfile
-      args:
-        REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED}
-        WATCHMAN: "true"
-        PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0}
-    image: intelowlproject/intelowl:test
-    volumes:
-      - ../:/opt/deploy/intel_owl
-    environment:
-      - DEBUG=True
-      - DJANGO_TEST_SERVER=True
-      - DJANGO_WATCHMAN_TIMEOUT=60
-
-  daphne:
-    image: intelowlproject/intelowl:test
-    volumes:
-      - ../:/opt/deploy/intel_owl
-
-  nginx:
-    build:
-      context: ..
-      dockerfile: docker/Dockerfile_nginx
-    image: intelowlproject/intelowl_nginx:test
-    volumes:
-      - ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf
-
-  celery_beat:
-    image: intelowlproject/intelowl:test
-    volumes:
-      - ../:/opt/deploy/intel_owl
-    environment:
-      - DEBUG=True
-
-  celery_worker_default:
-    image: intelowlproject/intelowl:test
-    volumes:
-      - ../:/opt/deploy/intel_owl
-    environment:
-      - DEBUG=True
+services:
+  uwsgi:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile
+      args:
+        REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED}
+        WATCHMAN: "true"
+        PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0}
+    image: intelowlproject/intelowl:test
+    volumes:
+      - ../:/opt/deploy/intel_owl
+    environment:
+      - DEBUG=True
+      - DJANGO_TEST_SERVER=True
+      - DJANGO_WATCHMAN_TIMEOUT=60
+
+  daphne:
+    image: intelowlproject/intelowl:test
+    volumes:
+      - ../:/opt/deploy/intel_owl
+
+  nginx:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile_nginx
+    image: intelowlproject/intelowl_nginx:test
+    volumes:
+      - ../configuration/nginx/django_server.conf:/etc/nginx/conf.d/default.conf
+
+  celery_beat:
+    image: intelowlproject/intelowl:test
+    volumes:
+      - ../:/opt/deploy/intel_owl
+    environment:
+      - DEBUG=True
+
+  celery_worker_default:
+    image: intelowlproject/intelowl:test
+    volumes:
+      - ../:/opt/deploy/intel_owl
+    environment:
+      - DEBUG=True
diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt
@@ -88,6 +88,7 @@ docxpy==0.8.5
 pylnk3==0.4.2
 androguard==3.4.0a1 # version >=4.x of androguard raises a dependency conflict with quark-engine==25.1.1
 wad==0.4.6
+debloat==1.6.3
 
 # httpx required for HTTP/2 support (Mullvad DNS rejects HTTP/1.1 with protocol errors)
 httpx[http2]==0.28.1