Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ exclude =
data,
.github

max-line-length = 120
max-line-length = 150

per-file-ignores =
# imported but unused
Expand Down
3 changes: 3 additions & 0 deletions conflowgen/api/export_container_flow_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def export(
) -> str:
"""
This extracts the container movement data from the SQL database to a folder of choice in a tabular data format.
In addition, you find a file called `metadata.yaml`. It contains an explanation for each column of each CSV file
as well as some general metadata, such as the ConFlowGen version used and the date and time of the scenario
generation.

Args:
folder_name: Name of folder that bundles the tabular data which belongs together
Expand Down
81 changes: 71 additions & 10 deletions conflowgen/application/services/export_container_flow_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import numpy as np
import pandas as pd
import peewee
import yaml
# noinspection PyProtectedMember
from peewee import ModelSelect

Expand All @@ -22,6 +24,7 @@
from conflowgen.domain_models.large_vehicle_schedule import Destination
from conflowgen.domain_models.vehicle import DeepSeaVessel, LargeScheduledVehicle, Feeder, Barge, Train, Truck, \
AbstractLargeScheduledVehicle
from conflowgen.application.models.container_flow_generation_properties import ContainerFlowGenerationProperties

EXPORTS_DEFAULT_DIR = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
Expand Down Expand Up @@ -129,6 +132,13 @@ def _save_as_xlsx(cls, df: pd.DataFrame, file_name: str):
},
}

large_schedule_vehicles_as_subtype = {
"deep_sea_vessels": DeepSeaVessel,
"feeders": Feeder,
"barges": Barge,
"trains": Train,
}

def __init__(self):
self.save_as_file_format_mapping = {
ExportFileFormat.csv: self._save_as_csv,
Expand Down Expand Up @@ -235,23 +245,60 @@ def _convert_sql_database_to_pandas_dataframe(cls) -> Dict[str, pd.DataFrame]:
"containers": df_container,
}

large_schedule_vehicles_as_subtype = {
"deep_sea_vessels": DeepSeaVessel,
"feeders": Feeder,
"barges": Barge,
"trains": Train,
}
for file_name, large_schedule_vehicle_as_subtype in large_schedule_vehicles_as_subtype.items():
cls.logger.debug(f"Gathering data for generating the '{file_name}' table...")
for vehicle_type_name, large_schedule_vehicle_as_subtype in cls.large_schedule_vehicles_as_subtype.items():
cls.logger.debug(f"Gathering data for generating the '{vehicle_type_name}' table...")
df = cls._convert_table_to_pandas_dataframe(large_schedule_vehicle_as_subtype)
if len(df) == 0:
cls.logger.info(f"No content found for the {file_name} table, the file will be empty.")
result[file_name] = df
cls.logger.info(f"No content found for the {vehicle_type_name} table, the file will be empty.")
result[vehicle_type_name] = df

df_trucks = cls._convert_table_to_pandas_dataframe(Truck)
result["trucks"] = df_trucks
return result

@classmethod
def _get_metadata_of_model(
cls, model: type[peewee.Model], metadata: Optional[dict] = None, single: bool = False, resolve: bool = True,
) -> Dict:
if metadata is None:
metadata = {}
for field in model._meta.sorted_fields: # pylint: disable=protected-access
if not field.help_text: # if there is no help text, we have no metadata to add
continue

if model in cls.columns_to_drop.keys(): # if model has columns to drop in the first place...
if field.name in cls.columns_to_drop[model]: # ...and the column is to be dropped...
continue # ...then don't include it into the metadata (as it has been dropped).

field_name = field.name
if model in cls.columns_to_rename.keys(): # if model has columns to rename in the first place...
if field_name in cls.columns_to_rename[model].keys(): # ...and the column name is to be renamed...
field_name = cls.columns_to_rename[model][field.name] # ...then re-set the field name.

# if nested
if isinstance(field, peewee.ForeignKeyField) and resolve:
cls._get_metadata_of_model(field.rel_model, metadata)
else: # actually enter metadata
if single: # if single entry in table, then it can also be spelled out
metadata[field_name] = {
"Explanation": field.help_text,
"Value": getattr(model.get_or_none(), field.name),
}
else: # default case: several entries per table
metadata[field_name] = field.help_text

return metadata

@classmethod
def _get_metadata(cls) -> Dict[str, dict]:
metadata = {
"general": cls._get_metadata_of_model(ContainerFlowGenerationProperties, single=True),
"container": cls._get_metadata_of_model(Container, resolve=False),
}
for vehicle_type_name, large_schedule_vehicle_as_subtype in cls.large_schedule_vehicles_as_subtype.items():
metadata[vehicle_type_name] = cls._get_metadata_of_model(large_schedule_vehicle_as_subtype)
return metadata

def export(
self,
folder_name: str,
Expand Down Expand Up @@ -294,5 +341,19 @@ def export(
self.logger.debug(f"Saving file {full_file_name}")
# noinspection PyArgumentList
self.save_as_file_format_mapping[file_format](df, path_to_file)

self._save_metadata(path_to_target_folder)
self.logger.debug("Saving file metadata.yaml")

self.logger.info("Export has finished successfully.")
return path_to_target_folder

@classmethod
def _save_metadata(cls, path_to_target_folder: str):
path_to_metadata_file = os.path.join(
path_to_target_folder,
"metadata.yaml"
)
with open(path_to_metadata_file, "w", encoding="utf-8") as f:
metadata = cls._get_metadata()
yaml.dump(metadata, f)
2 changes: 2 additions & 0 deletions conflowgen/domain_models/vehicle.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class LargeScheduledVehicle(BaseModel):
vehicle_name = CharField(
null=False,
default=lambda: "no-name-" + str(uuid.uuid4()),
help_text="The name of the vehicle. This might help the user of the data to track each vehicle, so it is "
"preferably unique."
)
capacity_in_teu = IntegerField(
null=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
import numpy as np
import pandas as pd
from peewee import IntegerField, Model, SqliteDatabase
import yaml

from conflowgen.application.data_types.export_file_format import ExportFileFormat
from conflowgen.application.models.container_flow_generation_properties import ContainerFlowGenerationProperties
from conflowgen.application.services.export_container_flow_service import (
CastingException,
ExportContainerFlowService,
Expand All @@ -33,8 +35,6 @@
TruckArrivalInformationForPickup,
)

# pylint: disable=protected-access, unused-argument, redundant-unittest-assert


class DummyModel:
"""Dummy model that mocks a Peewee ORM model."""
Expand Down Expand Up @@ -74,6 +74,7 @@ def setUpClass(cls):
Feeder,
Barge,
DeepSeaVessel,
ContainerFlowGenerationProperties,
]
# type: ignore[attr-defined]
cls._orig_model_dbs = {m: getattr(m, "_meta").database for m in cls._all_models}
Expand Down Expand Up @@ -111,24 +112,53 @@ def test_save_as_csv_xls_xlsx(self):
df = mock.Mock()

# CSV
ExportContainerFlowService._save_as_csv(df, "file.csv")
ExportContainerFlowService._save_as_csv(df, "file.csv") # pylint: disable=protected-access
df.to_csv.assert_called_once_with("file.csv")
with self.assertRaises(AssertionError):
ExportContainerFlowService._save_as_csv(df, "bad.txt")
ExportContainerFlowService._save_as_csv(df, "bad.txt") # pylint: disable=protected-access

# XLS
ExportContainerFlowService._save_as_xls(df, "file.xls")
ExportContainerFlowService._save_as_xls(df, "file.xls") # pylint: disable=protected-access
df.to_excel.assert_called_with("file.xls")
with self.assertRaises(AssertionError):
ExportContainerFlowService._save_as_xls(df, "wrong.xlsx")
ExportContainerFlowService._save_as_xls(df, "wrong.xlsx") # pylint: disable=protected-access

# XLSX
ExportContainerFlowService._save_as_xlsx(df, "file.xlsx")
ExportContainerFlowService._save_as_xlsx(df, "file.xlsx") # pylint: disable=protected-access
df.to_excel.assert_called_with("file.xlsx")
with self.assertRaises(AssertionError):
ExportContainerFlowService._save_as_xlsx(df, "wrong.xls")
ExportContainerFlowService._save_as_xlsx(df, "wrong.xls") # pylint: disable=protected-access

def test_get_metadata(self):
container_metadata = ExportContainerFlowService._get_metadata_of_model(Container) # pylint: disable=protected-access
self.assertIn("storage_requirement", container_metadata.keys())

def test_get_metadata_single(self):
cfgp = ContainerFlowGenerationProperties()
start_date = datetime.date(2025, 12, 8)
cfgp.start_date = start_date
cfgp.save()
container_flow_generation_properties_metadata = ExportContainerFlowService._get_metadata_of_model( # pylint: disable=protected-access
ContainerFlowGenerationProperties, single=True
)
self.assertIn("start_date", container_flow_generation_properties_metadata.keys())
self.assertIn("Explanation", container_flow_generation_properties_metadata["start_date"].keys())
self.assertIn("Value", container_flow_generation_properties_metadata["start_date"].keys())
self.assertEqual(container_flow_generation_properties_metadata["start_date"]["Value"], start_date)
self.assertEqual(
container_flow_generation_properties_metadata["start_date"]["Explanation"],
"The first day of the generated container flow"
)

# Conversion helpers
def test_save_metadata(self):
cfgp = ContainerFlowGenerationProperties()
start_date = datetime.date(2025, 12, 8)
cfgp.start_date = start_date
cfgp.save()
with (mock.patch.object(yaml, "dump"),
mock.patch("builtins.open") as mock_file):
ExportContainerFlowService._save_metadata("my/funny/path/") # pylint: disable=protected-access
mock_file.assert_called_once_with("my/funny/path/metadata.yaml", "w", encoding='utf-8')

def test_convert_table_to_pandas_dataframe_exceptions(self):
"""
Expand All @@ -144,7 +174,7 @@ def test_convert_table_to_pandas_dataframe_exceptions(self):

with mock.patch.object(pd.DataFrame, "drop", return_value=pd.DataFrame(fake_rows)):
with self.assertRaises(RuntimeError):
ExportContainerFlowService._convert_table_to_pandas_dataframe(fake_select)
ExportContainerFlowService._convert_table_to_pandas_dataframe(fake_select) # pylint: disable=protected-access

fake_rows = [{"id": 1, "f": np.float64(2.0)}]
fake_select.dicts.return_value = fake_rows
Expand All @@ -169,7 +199,7 @@ def test_convert_table_to_pandas_dataframe_exceptions(self):

with mock.patch.object(pd, "DataFrame", return_value=fake_df):
with self.assertRaises(CastingException):
ExportContainerFlowService._convert_table_to_pandas_dataframe(fake_select)
ExportContainerFlowService._convert_table_to_pandas_dataframe(fake_select) # pylint: disable=protected-access

def test_convert_sql_database_to_pandas_dataframe(self):
"""Covers lines 234–253."""
Expand All @@ -188,7 +218,7 @@ def side_effect(model, resolved_column=None):
),
mock.patch.object(ExportContainerFlowService, "logger") as log,
):
result = ExportContainerFlowService._convert_sql_database_to_pandas_dataframe()
result = ExportContainerFlowService._convert_sql_database_to_pandas_dataframe() # pylint: disable=protected-access

self.assertIn("containers", result)
self.assertIn("trucks", result)
Expand All @@ -200,7 +230,7 @@ def side_effect(model, resolved_column=None):

def test_export_creates_folder_and_saves_csv(self):
"""Covers 264 and 267–268."""
svc = ExportContainerFlowService()
ecfs = ExportContainerFlowService()
fake_dfs = {
"containers": pd.DataFrame([{"id": 1}]).set_index("id"),
"trucks": pd.DataFrame([{"id": 2}]).set_index("id"),
Expand All @@ -216,8 +246,13 @@ def test_export_creates_folder_and_saves_csv(self):
return_value=fake_dfs,
),
mock.patch.object(pd.DataFrame, "to_csv") as to_csv,
mock.patch.object(
ExportContainerFlowService,
"_save_metadata",
return_value=None,
)
):
out = svc.export("run1", None, ExportFileFormat.csv, overwrite=False)
out = ecfs.export("run1", None, ExportFileFormat.csv, overwrite=False)

makedirs.assert_called_once_with(EXPORTS_DEFAULT_DIR, exist_ok=True)
mkdir.assert_called_once()
Expand All @@ -226,7 +261,7 @@ def test_export_creates_folder_and_saves_csv(self):

def test_export_existing_folder_overwrite_behavior(self):
"""Covers lines 278 and 280 for overwrite True/False."""
svc = ExportContainerFlowService()
ecfs = ExportContainerFlowService()
fake_dfs = {"containers": pd.DataFrame([{"id": 1}]).set_index("id")}

with (
Expand All @@ -238,11 +273,16 @@ def test_export_existing_folder_overwrite_behavior(self):
),
mock.patch.object(pd.DataFrame, "to_csv") as to_csv,
mock.patch.object(ExportContainerFlowService, "logger"),
mock.patch.object(
ExportContainerFlowService,
"_save_metadata",
return_value=None,
)
):
with self.assertRaises(ExportOnlyAllowedToNotExistingFolderException):
svc.export("exists", "X", ExportFileFormat.csv, overwrite=False)
ecfs.export("exists", "X", ExportFileFormat.csv, overwrite=False)

out = svc.export("exists", "X", ExportFileFormat.csv, overwrite=True)
out = ecfs.export("exists", "X", ExportFileFormat.csv, overwrite=True)
to_csv.assert_called_once()
self.assertTrue(out.endswith(os.path.join("X", "exists")))

Expand Down Expand Up @@ -271,7 +311,7 @@ def test_convert_table_to_pandas_dataframe_resolved_column(self):
return_value=True,
),
):
ExportContainerFlowService._convert_table_to_pandas_dataframe(
ExportContainerFlowService._convert_table_to_pandas_dataframe( # pylint: disable=protected-access
fake_select, resolved_column="col_x"
)

Expand Down Expand Up @@ -307,7 +347,7 @@ class Meta:
svc.foreign_keys_to_resolve = {Child: {"parent_id": Parent}}

with mock.patch.object(svc, "debug_once") as dbg:
ExportContainerFlowService._convert_table_to_pandas_dataframe(Child)
ExportContainerFlowService._convert_table_to_pandas_dataframe(Child) # pylint: disable=protected-access

dbg.assert_called_with(mock.ANY)
except TypeError:
Expand All @@ -318,7 +358,7 @@ class Meta:
db.close()

def test_none_foreign_key(self):
"""Light weight integration test hitting line 165."""
"""Lightweight integration test hitting line 165."""
db = SqliteDatabase(":memory:")
try:
database_proxy.initialize(db)
Expand Down Expand Up @@ -350,7 +390,7 @@ def test_none_foreign_key(self):
destination=None,
)

ExportContainerFlowService._convert_table_to_pandas_dataframe(Container)
ExportContainerFlowService._convert_table_to_pandas_dataframe(Container) # pylint: disable=protected-access
except TypeError:
pass
finally:
Expand All @@ -377,7 +417,7 @@ def raise_keyerror(*_, **__):
}

with self.assertRaises(RuntimeError):
ExportContainerFlowService._convert_table_to_pandas_dataframe(Container)
ExportContainerFlowService._convert_table_to_pandas_dataframe(Container) # pylint: disable=protected-access
finally:
pd.DataFrame.drop = original_drop
ExportContainerFlowService.columns_to_drop = original_columns
Expand All @@ -396,7 +436,7 @@ def test_columns_to_drop(self):
return_value=df_mock,
),
):
out = svc._convert_table_to_pandas_dataframe(DummyModel)
out = svc._convert_table_to_pandas_dataframe(DummyModel) # pylint: disable=protected-access
self.assertIsInstance(out, pd.DataFrame)
except TypeError:
pass
Expand All @@ -422,6 +462,6 @@ def rename_spy(*args, **kwargs):
return_value=df_mock,
),
):
svc._convert_table_to_pandas_dataframe(DummyModel)
svc._convert_table_to_pandas_dataframe(DummyModel) # pylint: disable=protected-access

self.assertTrue(rename_called["hit"])
6 changes: 5 additions & 1 deletion docs/notebooks/first_steps.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,11 @@
"id": "0949540f-f1c4-4a8e-a86c-21fb1dddcd16",
"metadata": {},
"source": [
"Corresponding CSV files exist for the other vehicles as well."
"Corresponding CSV files exist for the other vehicles as well.\n",
"\n",
"In the same folder, you also find a file called `metadata.yaml`.\n",
"It contains an explanation for each column of each CSV file.\n",
"In addition, it includes some general metadata, such as the ConFlowGen version used and the date and time of the scenario generation."
]
}
],
Expand Down
Loading
Loading