Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
814a251
Changed library from maxminddb to geoip2
fgibertoni Apr 19, 2024
5002680
Refactoring maxmind not finished
fgibertoni Apr 19, 2024
3557647
Refactoring maxmind analyzer
fgibertoni Apr 22, 2024
50375ef
Added methods for query db
fgibertoni Apr 22, 2024
a4eb906
Made a method 'private'
fgibertoni Apr 22, 2024
00d4c1a
Renamed method
fgibertoni Apr 22, 2024
60f396c
Made attributes 'private'
fgibertoni Apr 22, 2024
7a38689
Added return type
fgibertoni Apr 22, 2024
51d72db
Improved log message
fgibertoni Apr 22, 2024
fef2d4b
Renamed back to update() because of updating cron
fgibertoni Apr 22, 2024
a1f9114
Fixed media_root settings
fgibertoni Apr 22, 2024
dcb3928
Added log to tar extraction
fgibertoni Apr 22, 2024
8cb688d
Removed unnecessary variable
fgibertoni Apr 22, 2024
d45d1da
Improved log messages
fgibertoni Apr 22, 2024
3a55feb
Readded maxminddb library
fgibertoni Apr 23, 2024
e3f57ef
Update api_app/analyzers_manager/observable_analyzers/maxmind.py
fgibertoni Apr 23, 2024
6c71e33
Fix_ci (#2284)
0ssigeno Apr 23, 2024
4c9340e
Changed library from maxminddb to geoip2
fgibertoni Apr 19, 2024
e450fd0
Refactoring maxmind not finished
fgibertoni Apr 19, 2024
0e3d20a
Refactoring maxmind analyzer
fgibertoni Apr 22, 2024
32054e1
Added methods for query db
fgibertoni Apr 22, 2024
384b999
Made a method 'private'
fgibertoni Apr 22, 2024
a84344c
Renamed method
fgibertoni Apr 22, 2024
1f3fef5
Made attributes 'private'
fgibertoni Apr 22, 2024
ea44316
Added return type
fgibertoni Apr 22, 2024
61528e0
Improved log message
fgibertoni Apr 22, 2024
b7a4fcc
Renamed back to update() because of updating cron
fgibertoni Apr 22, 2024
247137b
Fixed media_root settings
fgibertoni Apr 22, 2024
c04ba9d
Added log to tar extraction
fgibertoni Apr 22, 2024
ff4621d
Removed unnecessary variable
fgibertoni Apr 22, 2024
3b8d10d
Improved log messages
fgibertoni Apr 22, 2024
8d9bbea
Readded maxminddb library
fgibertoni Apr 23, 2024
a888564
Merge branch 'asn-maxmind-integration' of github.com:intelowlproject/…
fgibertoni Apr 23, 2024
ce58906
Moved functions and improved logs
fgibertoni Apr 24, 2024
c9d8cbc
Changed error handling
fgibertoni Apr 26, 2024
07a3bf4
Fixed deepsource warning
fgibertoni Apr 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 142 additions & 95 deletions api_app/analyzers_manager/observable_analyzers/maxmind.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,69 +6,93 @@
import os
import shutil
import tarfile
from typing import Optional

import maxminddb
import requests
from django.conf import settings
from geoip2.database import Reader
from geoip2.errors import AddressNotFoundError, GeoIP2Error
from geoip2.models import ASN, City, Country

from api_app.analyzers_manager import classes
from api_app.analyzers_manager.exceptions import (
AnalyzerConfigurationException,
AnalyzerRunException,
)
from api_app.models import PluginConfig
from tests.mock_utils import if_mock_connections, patch

logger = logging.getLogger(__name__)

db_names = ["GeoLite2-Country.mmdb", "GeoLite2-City.mmdb"]

class MaxmindDBManager:
_supported_dbs: [str] = ["GeoLite2-Country", "GeoLite2-City", "GeoLite2-ASN"]
_default_db_extension: str = ".mmdb"

class Maxmind(classes.ObservableAnalyzer):
_api_key_name: str
@classmethod
def get_supported_dbs(cls) -> [str]:
return [db_name + cls._default_db_extension for db_name in cls._supported_dbs]

def run(self):
@classmethod
def _get_physical_location(cls, db: str) -> str:
return f"{settings.MEDIA_ROOT}/{db}{cls._default_db_extension}"

@classmethod
def update_all_dbs(cls, api_key: str) -> bool:
return all(cls._update_db(db, api_key) for db in cls._supported_dbs)

def query_all_dbs(self, observable_query: str, api_key: str) -> dict:
maxmind_final_result = {}
for db in db_names:
try:
db_location = _get_db_location(db)
if not os.path.isfile(db_location) and not self._update_db(
db, self._api_key_name
):
raise AnalyzerRunException(
f"failed extraction of maxmind db {db},"
" reached max number of attempts"
)
if not os.path.exists(db_location):
raise maxminddb.InvalidDatabaseError(
"database location does not exist"
)
reader = maxminddb.open_database(db_location)
maxmind_result = reader.get(self.observable_name)
reader.close()
except maxminddb.InvalidDatabaseError as e:
error_message = f"Invalid database error: {e}"
logger.exception(error_message)
maxmind_result = {"error": error_message}
logger.info(f"maxmind result: {maxmind_result}")
for db in self._supported_dbs:
maxmind_result = self._query_single_db(observable_query, db, api_key)

if maxmind_result:
logger.info(f"maxmind result: {maxmind_result}")
maxmind_final_result.update(maxmind_result)
else:
logger.warning("maxmind result not available")

return maxmind_final_result

@classmethod
def _get_api_key(cls) -> Optional[str]:
for plugin in PluginConfig.objects.filter(
parameter__python_module=cls.python_module,
parameter__is_secret=True,
parameter__name="api_key_name",
):
if plugin.value:
return plugin.value
return None
def _query_single_db(self, query_ip: str, db_name: str, api_key: str) -> dict:
result: ASN | City | Country
db_path: str = self._get_physical_location(db_name)
self._check_and_update_db(api_key, db_name)

logger.info(f"Query {db_name=} for {query_ip=}")
with Reader(db_path) as reader:
try:
if "ASN" in db_name:
result = reader.asn(query_ip)
elif "Country" in db_name:
result = reader.country(query_ip)
elif "City" in db_name:
result = reader.city(query_ip)
except AddressNotFoundError:
reader.close()
logger.info(
f"Query for observable '{query_ip}' "
f"didn't produce any results in any db."
)
return {}
except (GeoIP2Error, maxminddb.InvalidDatabaseError) as e:
error_message = f"GeoIP2 database error: {e}"
logger.exception(error_message)
return {"error": error_message}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

c'è proprio un modo specifico per inserire errori del framework:

            self.report.errors.append(error_message)
            self.report.save()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e puoi ritornare sempre vuoto

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem with this approach is that i'm in MaxmindDBManager that is not a subclass of ObservableAnalyzer.
I tried to change the code accordingly.
Now the query_all_dbs method returns 2 dicts, one with the results and one with the errors.

else:
reader.close()
return result.raw

def _check_and_update_db(self, api_key: str, db_name: str):
db_path = self._get_physical_location(db_name)
if not os.path.isfile(db_path) and not self._update_db(db_name, api_key):
raise AnalyzerRunException(
f"failed extraction of maxmind db {db_name},"
" reached max number of attempts"
)
if not os.path.exists(db_path):
raise maxminddb.InvalidDatabaseError(
f"database location '{db_path}' does not exist"
)

@classmethod
def _update_db(cls, db: str, api_key: str) -> bool:
Expand All @@ -77,78 +101,101 @@ def _update_db(cls, db: str, api_key: str) -> bool:
f"Unable to find api key for {cls.__name__}"
)

db_location = _get_db_location(db)
try:
db_name_wo_ext = db[:-5]
logger.info(f"starting download of db {db_name_wo_ext} from maxmind")
url = (
"https://download.maxmind.com/app/geoip_download?edition_id="
f"{db_name_wo_ext}&license_key={api_key}&suffix=tar.gz"
)
r = requests.get(url)
if r.status_code >= 300:
raise AnalyzerRunException(
f"failed request for new maxmind db {db_name_wo_ext}."
f" Status code: {r.status_code}"
)
logger.info(f"starting download of {db=} from maxmind")

tar_db_path = f"/tmp/{db_name_wo_ext}.tar.gz"
with open(tar_db_path, "wb") as f:
f.write(r.content) # lgtm [py/clear-text-storage-sensitive-data]

tf = tarfile.open(tar_db_path)
directory_to_extract_files = settings.MEDIA_ROOT
tf.extractall(str(directory_to_extract_files))

today = datetime.datetime.now().date()
counter = 0
directory_found = False
downloaded_db_path = ""
# this is because we do not know the exact date of the db we downloaded
while counter < 10 or not directory_found:
date_to_check = today - datetime.timedelta(days=counter)
formatted_date = date_to_check.strftime("%Y%m%d")
downloaded_db_path = (
f"{directory_to_extract_files}/"
f"{db_name_wo_ext}_{formatted_date}/{db}"
)
try:
os.rename(downloaded_db_path, db_location)
except FileNotFoundError:
logger.debug(
f"{downloaded_db_path} not found move to the day before"
)
counter += 1
else:
directory_found = True
shutil.rmtree(
f"{directory_to_extract_files}/"
f"{db_name_wo_ext}_{formatted_date}"
)

if directory_found:
logger.info(f"maxmind directory found {downloaded_db_path}")
else:
tar_db_path = cls._download_db(db, api_key)
cls._extract_db_to_media_root(tar_db_path)
directory_found = cls._remove_old_db(db)

if not directory_found:
return False

logger.info(f"ended download of db {db_name_wo_ext} from maxmind")
logger.info(f"ended download of {db=} from maxmind")
return True

except Exception as e:
logger.exception(e)
return False

@classmethod
def _remove_old_db(cls, db: str) -> bool:
physical_db_location = cls._get_physical_location(db)
today = datetime.datetime.now().date()
counter = 0
directory_found = False
# this is because we do not know the exact date of the db we downloaded
while counter < 10 or not directory_found:
formatted_date = (today - datetime.timedelta(days=counter)).strftime(
"%Y%m%d"
)
downloaded_db_path = (
f"{settings.MEDIA_ROOT}/"
f"{db}_{formatted_date}/{db}{cls._default_db_extension}"
)
try:
os.rename(downloaded_db_path, physical_db_location)
except FileNotFoundError:
logger.debug(f"{downloaded_db_path} not found move to the day before")
counter += 1
else:
directory_found = True
shutil.rmtree(f"{settings.MEDIA_ROOT}/" f"{db}_{formatted_date}")
logger.info(f"maxmind directory found {downloaded_db_path}")
return directory_found

@classmethod
def _extract_db_to_media_root(cls, tar_db_path: str):
logger.info(f"Started extracting {tar_db_path} to {settings.MEDIA_ROOT}.")
tf = tarfile.open(tar_db_path)
tf.extractall(str(settings.MEDIA_ROOT))
logger.info(f"Finished extracting {tar_db_path} to {settings.MEDIA_ROOT}.")

@classmethod
def _download_db(cls, db_name: str, api_key: str) -> str:
url = (
"https://download.maxmind.com/app/geoip_download?edition_id="
f"{db_name}&license_key={api_key}&suffix=tar.gz"
)
response = requests.get(url)
if response.status_code >= 300:
raise AnalyzerRunException(
f"failed request for new maxmind db {db_name}."
f" Status code: {response.status_code}"
f"\nResponse: {response.raw}"
)

return cls._write_db_to_filesystem(db_name, response.content)

@classmethod
def _write_db_to_filesystem(cls, db_name: str, content: bytes) -> str:
tar_db_path = f"/tmp/{db_name}.tar.gz"
logger.info(f"starting writing db {db_name} from maxmind to {tar_db_path}")
with open(tar_db_path, "wb") as f:
f.write(content)

return tar_db_path


class Maxmind(classes.ObservableAnalyzer):
_api_key_name: str
_maxmind_db_manager: "MaxmindDBManager" = MaxmindDBManager()

def run(self):
return self._maxmind_db_manager.query_all_dbs(
self.observable_name, self._api_key_name
)

@classmethod
def get_db_names(cls) -> [str]:
return cls._maxmind_db_manager.get_supported_dbs()

@classmethod
def update(cls) -> bool:
api_key = cls._get_api_key()
return all(cls._update_db(db, api_key) for db in db_names)
return cls._maxmind_db_manager.update_all_dbs(cls._api_key_name)

@classmethod
def _monkeypatch(cls):
# completely skip because does not work without connection.
patches = [if_mock_connections(patch.object(cls, "run", return_value={}))]
return super()._monkeypatch(patches=patches)


def _get_db_location(db):
return f"{settings.MEDIA_ROOT}/{db}"
1 change: 1 addition & 0 deletions requirements/project-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ google-cloud-webrisk==1.14.0
intezer-sdk==1.20
lief==0.14.0
maxminddb==2.6.0
geoip2==4.8.0
mwdblib==4.5.0
git+https://github.com/decalage2/oletools.git@ccf99d1a8f85e552f5cc130fbaa504cfe5725a92
OTXv2==1.5.12
Expand Down
2 changes: 1 addition & 1 deletion tests/test_crons.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_remove_old_jobs(self):
@if_mock_connections(skip("not working without connection"))
def test_maxmind_updater(self):
maxmind.Maxmind.update()
for db in maxmind.db_names:
for db in maxmind.Maxmind.get_db_names():
self.assertTrue(os.path.exists(db))

@if_mock_connections(
Expand Down