diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 0000000..c9cea6f --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,4 @@ +{ + "MD033": { "allowed_elements": ["div"] }, + "MD013": { "line_length": 120 } +} diff --git a/docs/analysis_modules.md b/docs/analysis_modules.md index 39bee43..b1cf84d 100644 --- a/docs/analysis_modules.md +++ b/docs/analysis_modules.md @@ -11,7 +11,7 @@ social:
-![Image title](../assets/images/analysis_modules/waterfall.svg){ align=right loading=lazy width="50%"} +![Image title](assets/images/analysis_modules/waterfall.svg){ align=right loading=lazy width="50%"} Waterfall plots are particularly good for showing how different things add or subtract from a starting number. For instance, @@ -42,3 +42,53 @@ waterfall_plot( rot=0, ) ``` + +### Timeline Plot + +
+ +![Image title](assets/images/analysis_modules/time_plot.svg){ align=right loading=lazy width="50%"} + +Timeline plots are a fundamental tool for interpreting transactional data within a temporal context. By presenting data +in a chronological sequence, these visualizations reveal patterns and trends that might otherwise remain hidden in raw +numbers, making them essential for both historical analysis and forward-looking insights. They are particularly useful +for: + +- Tracking sales performance across different periods (e.g., daily, weekly, monthly) +- Identifying seasonal patterns or promotional impacts on sales +- Comparing the performance of different product categories or store locations over time +- Visualizing customer behavior trends, such as purchase frequency or average transaction value + +
+ +Example: + +```python +import numpy as np +import pandas as pd + +from pyretailscience.standard_graphs import time_plot + +# Create a sample DataFrame with 3 groups +rng = np.random.default_rng(42) +df = pd.DataFrame( + { + "transaction_datetime": pd.concat([pd.Series(pd.date_range(start="2022-01-01", periods=200, freq="D"))] * 3), + "total_price": np.concatenate([rng.integers(1, 1000, size=200) * multiplier for multiplier in range(1, 4)]), + "group": ["Group A"] * 200 + ["Group B"] * 200 + ["Group C"] * 200, + }, +) + +time_plot( + df, + period="M", + group_col="group", + value_col="total_price", + agg_func="sum", + title="Monthly Sales by Customer Group", + y_label="Sales", + legend_title="Customer Group", + source_text="Source: PyRetailScience - Sales FY2024", + move_legend_outside=True, +) +``` diff --git a/docs/assets/images/analysis_modules/time_plot.svg b/docs/assets/images/analysis_modules/time_plot.svg new file mode 100644 index 0000000..f1ebf64 --- /dev/null +++ b/docs/assets/images/analysis_modules/time_plot.svg @@ -0,0 +1 @@ + diff --git a/docs/assets/images/analysis_modules/waterfall.svg b/docs/assets/images/analysis_modules/waterfall.svg index 748cf74..e124b2e 100644 --- a/docs/assets/images/analysis_modules/waterfall.svg +++ b/docs/assets/images/analysis_modules/waterfall.svg @@ -1,1990 +1 @@ - - - - - - - - 2024-07-18T17:11:06.575030 - image/svg+xml - - - Matplotlib v3.8.2, https://matplotlib.org/ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + diff --git a/pyretailscience/standard_graphs.py b/pyretailscience/standard_graphs.py index 8692b64..b0e237d 100644 --- a/pyretailscience/standard_graphs.py +++ b/pyretailscience/standard_graphs.py @@ -28,10 +28,21 @@ def time_plot( legend_title: str | None = None, ax: Axes | None = None, source_text: str | None = None, + move_legend_outside: bool = False, **kwargs: dict[str, any], ) -> SubplotBase: """Plots the value_col over time. + Timeline plots are a fundamental tool for interpreting transactional data within a temporal context. By presenting + data in a chronological sequence, these visualizations reveal patterns and trends that might otherwise remain hidden + in raw numbers, making them essential for both historical analysis and forward-looking insights. They are + particularly useful for: + + - Tracking sales performance across different periods (e.g., daily, weekly, monthly) + - Identifying seasonal patterns or promotional impacts on sales + - Comparing the performance of different product categories or store locations over time + - Visualizing customer behavior trends, such as purchase frequency or average transaction value + Args: df (pd.DataFrame): The dataframe to plot. value_col (str): The column to plot. @@ -47,6 +58,7 @@ def time_plot( the title case of `group_col` ax (Axes, optional): The matplotlib axes object to plot on. Defaults to None. source_text (str, optional): The source text to add to the plot. Defaults to None. + move_legend_outside (bool, optional): Whether to move the legend outside the plot. Defaults to True. **kwargs: Additional keyword arguments to pass to the Pandas plot function. Returns: @@ -88,9 +100,13 @@ def time_plot( ax.yaxis.set_major_formatter(lambda x, pos: gu.human_format(x, pos, decimals=decimals)) if show_legend: + legend_bbox_to_anchor = None + if move_legend_outside: + legend_bbox_to_anchor = (1.05, 1) legend = ax.legend( title=gu.not_none(legend_title, group_col.title()), frameon=True, + bbox_to_anchor=legend_bbox_to_anchor, ) legend.get_frame().set_facecolor("white") legend.get_frame().set_edgecolor("white") @@ -301,6 +317,16 @@ def waterfall_plot( ) -> Axes: """Generates a waterfall chart. + Waterfall plots are particularly good for showing how different things add or subtract from a starting number. For + instance, + + - Changes in sales figures from one period to another + - Breakdown of profit margins + - Impact of different product categories on overall revenue + + They are often used to identify key drivers of financial performance, highlight areas for improvement, and communicate + complex data stories to stakeholders in an intuitive manner. + Args: amounts (list[float]): The amounts to plot. labels (list[str]): The labels for the amounts.