Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions api_app/ingestors_manager/ingestors/malshare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import logging
from typing import Any, Iterable
from unittest.mock import patch

import requests

from api_app.ingestors_manager.classes import Ingestor
from api_app.ingestors_manager.exceptions import IngestorRunException
from tests.mock_utils import MockUpResponse, if_mock_connections

logger = logging.getLogger(__name__)


class Malshare(Ingestor):

url: str
_api_key_name: str
limit: int

@classmethod
def update(cls) -> bool:
pass

def download_sample(self, h):
try:
logger.info(f"Downloading sample {h}")
download_url = f"{self.url}/api.php"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is duplicated, I think we can set it as class attribute

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created base_url class attribute.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about having a class attribute called endpoint that has it's value to api.php.
Then, when making the request you combine self.url and self.endpoint

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it..Updated

params = {
"api_key": self._api_key_name,
"action": "getfile",
"hash": h,
}
response = requests.get(download_url, params=params)
response.raise_for_status()
if not isinstance(response.content, bytes):
raise ValueError("The downloaded file is not instance of bytes")
except Exception as e:
error_message = f"Cannot download the file {h}. Raised Error: {e}."
raise IngestorRunException(error_message)
return response.content

def run(self) -> Iterable[Any]:
req_url = f"{self.url}/api.php"
params = {
"api_key": self._api_key_name,
"action": "getlist",
}
result = requests.get(req_url, params=params)
result.raise_for_status()
content = result.json()
if not isinstance(content, list):
raise IngestorRunException(f"Content {content} not expected")

limit = min(len(content), self.limit)
for elem in content[:limit]:
sample_hash = elem.get("sha256")
logger.info(f"Downloading sample {sample_hash}")
sample = self.download_sample(sample_hash)
yield sample

@classmethod
def _monkeypatch(cls):
patches = [
if_mock_connections(
patch(
"requests.get",
side_effect=lambda url, *args, **kwargs: (
MockUpResponse(
[
{
"md5": "56cb253271d0bc47e2869d351ebd2551",
"sha1": "8620e2d371740651fb2a111cbaf3ba1632b61b61",
"sha256": "6cf10ac2e7b6bd7ff09e237322a89b1259da78bd54c20fe11339092fa921cf45",
},
{
"md5": "56cb33e74796abcaa39783e8e873e351",
"sha1": "0d72b496d104eb71ecb9d2107b99425e3eccf566",
"sha256": "f85f9bd1a1cb68514876c2b13b8643715d551e055c7cb26f764a42abaac41067",
},
{
"md5": "56cb78ab63ac800ef1e900a2ca855e90",
"sha1": "cbbbf4c8608a0722a8490b352364a030211dfdbd",
"sha256": "c26841fc297fadba690e4ae3be2f9f1fbef0766b46a828d7f12814dddcbd5478",
},
],
200,
)
if "getlist" in kwargs.get("params", {}).get("action", "")
else (
MockUpResponse(
{},
content=b"mock file content",
status_code=200,
)
)
),
),
)
]
return super()._monkeypatch(patches=patches)
244 changes: 244 additions & 0 deletions api_app/ingestors_manager/migrations/0029_ingestor_config_malshare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
from django.db import migrations
from django.db.models.fields.related_descriptors import (
ForwardManyToOneDescriptor,
ForwardOneToOneDescriptor,
ManyToManyDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
)

plugin = {
"python_module": {
"health_check_schedule": None,
"update_schedule": {
"minute": "*",
"hour": "*",
"day_of_week": "*",
"day_of_month": "*",
"month_of_year": "*",
},
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"schedule": {
"minute": "0",
"hour": "*",
"day_of_week": "*",
"day_of_month": "*",
"month_of_year": "*",
},
"periodic_task": {
"crontab": {
"minute": "0",
"hour": "*",
"day_of_week": "*",
"day_of_month": "*",
"month_of_year": "*",
},
"name": "MalshareIngestor",
"task": "intel_owl.tasks.execute_ingestor",
"kwargs": '{"config_name": "Malshare"}',
"queue": "default",
"enabled": False,
},
"user": {
"username": "MalshareIngestor",
"profile": {
"user": {
"username": "MalshareIngestor",
"email": "",
"first_name": "",
"last_name": "",
"password": "",
"is_active": True,
},
"company_name": "",
"company_role": "",
"twitter_handle": "",
"discover_from": "other",
"task_priority": 7,
"is_robot": True,
},
},
"playbooks_choice": ["FREE_TO_USE_ANALYZERS"],
"name": "Malshare",
"description": "[MalShare](https://malshare.com/) is a public malware repository that allows registered users to access and share a collection of malware samples for research and analysis purposes",
"disabled": True,
"soft_time_limit": 60,
"routing_key": "ingestor",
"health_check_status": True,
"maximum_jobs": 10,
"delay": "00:00:00",
"model": "ingestors_manager.IngestorConfig",
}

params = [
{
"python_module": {
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"name": "url",
"type": "str",
"description": "",
"is_secret": False,
"required": False,
},
{
"python_module": {
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"name": "limit",
"type": "int",
"description": "",
"is_secret": False,
"required": False,
},
{
"python_module": {
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"name": "api_key_name",
"type": "str",
"description": "",
"is_secret": True,
"required": True,
},
]

values = [
{
"parameter": {
"python_module": {
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"name": "url",
"type": "str",
"description": "",
"is_secret": False,
"required": False,
},
"analyzer_config": None,
"connector_config": None,
"visualizer_config": None,
"ingestor_config": "Malshare",
"pivot_config": None,
"for_organization": False,
"value": "https://malshare.com/",
"updated_at": "2025-03-20T13:27:06.934775Z",
"owner": None,
},
{
"parameter": {
"python_module": {
"module": "malshare.Malshare",
"base_path": "api_app.ingestors_manager.ingestors",
},
"name": "limit",
"type": "int",
"description": "",
"is_secret": False,
"required": False,
},
"analyzer_config": None,
"connector_config": None,
"visualizer_config": None,
"ingestor_config": "Malshare",
"pivot_config": None,
"for_organization": False,
"value": 10,
"updated_at": "2025-03-20T13:18:41.183632Z",
"owner": None,
},
]


def _get_real_obj(Model, field, value):
def _get_obj(Model, other_model, value):
if isinstance(value, dict):
real_vals = {}
for key, real_val in value.items():
real_vals[key] = _get_real_obj(other_model, key, real_val)
value = other_model.objects.get_or_create(**real_vals)[0]
# it is just the primary key serialized
else:
if isinstance(value, int):
if Model.__name__ == "PluginConfig":
value = other_model.objects.get(name=plugin["name"])
else:
value = other_model.objects.get(pk=value)
else:
value = other_model.objects.get(name=value)
return value

if (
type(getattr(Model, field))
in [
ForwardManyToOneDescriptor,
ReverseManyToOneDescriptor,
ReverseOneToOneDescriptor,
ForwardOneToOneDescriptor,
]
and value
):
other_model = getattr(Model, field).get_queryset().model
value = _get_obj(Model, other_model, value)
elif type(getattr(Model, field)) in [ManyToManyDescriptor] and value:
other_model = getattr(Model, field).rel.model
value = [_get_obj(Model, other_model, val) for val in value]
return value


def _create_object(Model, data):
mtm, no_mtm = {}, {}
for field, value in data.items():
value = _get_real_obj(Model, field, value)
if type(getattr(Model, field)) is ManyToManyDescriptor:
mtm[field] = value
else:
no_mtm[field] = value
try:
o = Model.objects.get(**no_mtm)
except Model.DoesNotExist:
o = Model(**no_mtm)
o.full_clean()
o.save()
for field, value in mtm.items():
attribute = getattr(o, field)
if value is not None:
attribute.set(value)
return False
return True


def migrate(apps, schema_editor):
Parameter = apps.get_model("api_app", "Parameter")
PluginConfig = apps.get_model("api_app", "PluginConfig")
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
if not Model.objects.filter(name=plugin["name"]).exists():
exists = _create_object(Model, plugin)
if not exists:
for param in params:
_create_object(Parameter, param)
for value in values:
_create_object(PluginConfig, value)


def reverse_migrate(apps, schema_editor):
python_path = plugin.pop("model")
Model = apps.get_model(*python_path.split("."))
Model.objects.get(name=plugin["name"]).delete()


class Migration(migrations.Migration):
atomic = False
dependencies = [
("api_app", "0071_delete_last_elastic_report"),
("ingestors_manager", "0028_ingestor_config_greedybear"),
]

operations = [migrations.RunPython(migrate, reverse_migrate)]
Loading