diff --git a/.github/workflows/webviz-subsurface.yml b/.github/workflows/webviz-subsurface.yml
index d82adc106..95b133cf9 100644
--- a/.github/workflows/webviz-subsurface.yml
+++ b/.github/workflows/webviz-subsurface.yml
@@ -77,10 +77,10 @@ jobs:
env:
# If you want the CI to (temporarily) run against your fork of the testdada,
# change the value her from "equinor" to your username.
- TESTDATA_REPO_OWNER: equinor
+ TESTDATA_REPO_OWNER: asnyv
# If you want the CI to (temporarily) run against another branch than master,
# change the value her from "master" to the relevant branch name.
- TESTDATA_REPO_BRANCH: master
+ TESTDATA_REPO_BRANCH: group_b_testdata
run: |
webviz certificate
git clone --depth 1 --branch $TESTDATA_REPO_BRANCH https://github.com/$TESTDATA_REPO_OWNER/webviz-subsurface-testdata.git
diff --git a/setup.py b/setup.py
index e88f4c63c..8c4e805c4 100644
--- a/setup.py
+++ b/setup.py
@@ -45,6 +45,10 @@
"ReservoirSimulationTimeSeriesRegional = "
+ "webviz_subsurface.plugins:ReservoirSimulationTimeSeriesRegional",
"RftPlotter = webviz_subsurface.plugins:RftPlotter",
+ "ParameterResponseMultipleRegression = "
+ + "webviz_subsurface.plugins:ParameterResponseMultipleRegression",
+ "ParameterResponseParallelCoordinates = "
+ + "webviz_subsurface.plugins:ParameterResponseParallelCoordinates",
]
},
install_requires=[
@@ -57,6 +61,7 @@
"webviz-config>=0.0.55",
"webviz-subsurface-components>=0.0.23",
"xtgeo>=2.8",
+ "statsmodels>=0.11.1",
],
tests_require=TESTS_REQUIRE,
extras_require={"tests": TESTS_REQUIRE},
diff --git a/webviz_subsurface/_utils/response_aggregation.py b/webviz_subsurface/_utils/response_aggregation.py
new file mode 100644
index 000000000..b7883a51c
--- /dev/null
+++ b/webviz_subsurface/_utils/response_aggregation.py
@@ -0,0 +1,49 @@
+from webviz_config.common_cache import CACHE
+import numpy as np
+
+
+@CACHE.memoize(timeout=CACHE.TIMEOUT)
+def filter_and_sum_responses(
+ dframe, ensemble, response, filteroptions=None, aggregation="sum"
+):
+ """Cached wrapper for _filter_and_sum_responses"""
+ return _filter_and_sum_responses(
+ dframe=dframe,
+ ensemble=ensemble,
+ response=response,
+ filteroptions=filteroptions,
+ aggregation=aggregation,
+ )
+
+
+def _filter_and_sum_responses(
+ dframe, ensemble, response, filteroptions=None, aggregation="sum",
+):
+ """Filter response dataframe for the given ensemble
+ and optional filter columns. Returns dataframe grouped and
+ aggregated per realization."""
+ df = dframe.copy()
+ df = df.loc[df["ENSEMBLE"] == ensemble]
+ if filteroptions:
+ for opt in filteroptions:
+ if opt["type"] == "multi" or opt["type"] == "single":
+ if isinstance(opt["values"], list):
+ df = df.loc[df[opt["name"]].isin(opt["values"])]
+ else:
+ if opt["name"] == "DATE" and isinstance(opt["values"], str):
+ df = df.loc[df["DATE"].astype(str) == opt["values"]]
+ else:
+ df = df.loc[df[opt["name"]] == opt["values"]]
+
+ elif opt["type"] == "range":
+ df = df.loc[
+ (df[opt["name"]] >= np.min(opt["values"]))
+ & (df[opt["name"]] <= np.max(opt["values"]))
+ ]
+ if aggregation == "sum":
+ return df.groupby("REAL").sum().reset_index()[["REAL", response]]
+ if aggregation == "mean":
+ return df.groupby("REAL").mean().reset_index()[["REAL", response]]
+ raise ValueError(
+ f"Aggregation of response file specified as '{aggregation}'' is invalid. "
+ )
diff --git a/webviz_subsurface/plugins/__init__.py b/webviz_subsurface/plugins/__init__.py
index 1196e62d2..a2b850e80 100644
--- a/webviz_subsurface/plugins/__init__.py
+++ b/webviz_subsurface/plugins/__init__.py
@@ -49,7 +49,10 @@
ReservoirSimulationTimeSeriesRegional,
)
from ._rft_plotter.rft_plotter import RftPlotter
-
+from ._parameter_response_multiple_regression import ParameterResponseMultipleRegression
+from ._parameter_response_parallel_coordinates import (
+ ParameterResponseParallelCoordinates,
+)
__all__ = [
"ParameterDistribution",
@@ -72,4 +75,6 @@
"RelativePermeability",
"ReservoirSimulationTimeSeriesRegional",
"RftPlotter",
+ "ParameterResponseMultipleRegression",
+ "ParameterResponseParallelCoordinates",
]
diff --git a/webviz_subsurface/plugins/_parameter_response_multiple_regression.py b/webviz_subsurface/plugins/_parameter_response_multiple_regression.py
new file mode 100644
index 000000000..42ed49b4f
--- /dev/null
+++ b/webviz_subsurface/plugins/_parameter_response_multiple_regression.py
@@ -0,0 +1,1173 @@
+import warnings
+from itertools import combinations
+from pathlib import Path
+
+import dash
+import dash_core_components as dcc
+import dash_html_components as html
+import numpy as np
+import numpy.linalg as la
+import pandas as pd
+import plotly.graph_objects as go
+import statsmodels.api as sm
+import webviz_core_components as wcc
+from dash.dependencies import Input, Output, State
+from dash.exceptions import PreventUpdate
+from dash_table import DataTable
+from dash_table.Format import Format
+from webviz_config import WebvizPluginABC
+from webviz_config.common_cache import CACHE
+from webviz_config.utils import calculate_slider_step
+from webviz_config.webviz_store import webvizstore
+
+from .._datainput.fmu_input import load_csv, load_parameters, load_smry
+from .._utils.response_aggregation import filter_and_sum_responses
+
+
+class ParameterResponseMultipleRegression(WebvizPluginABC):
+ """ Visualizes the results of multiple linear regression of parameters
+and a chosen response using forward selection to limit the number of terms. \
+Iteraction terms can be be added, up to third order.
+
+Adjusted R-squared is used as the criterion in the forward selection algorithm.
+
+!>Regression models break down when there are duplicate or highly correlated parameters. \
+Please make sure to properly filter your inputs or the model will give answers that are misleading.
+---
+**Three main options for input data: Aggregated, file per realization and read from UNSMRY.**
+
+**Using aggregated data**
+* **`parameter_csv`:** Aggregated csvfile for input parameters with `REAL` and `ENSEMBLE` columns \
+(absolute path or relative to config file).
+* **`response_csv`:** Aggregated csvfile for response parameters with `REAL` and `ENSEMBLE` \
+columns (absolute path or relative to config file).
+
+
+**Using a response file per realization**
+* **`ensembles`:** Which ensembles in `shared_settings` to visualize.
+* **`response_file`:** Local (per realization) csv file for response parameters (Cannot be \
+ combined with `response_csv` and `parameter_csv`).
+
+
+**Using simulation time series data directly from `UNSMRY` files as responses**
+* **`ensembles`:** Which ensembles in `shared_settings` to visualize. The lack of `response_file` \
+ implies that the input data should be time series data from simulation `.UNSMRY` \
+ files, read using `fmu-ensemble`.
+* **`column_keys`:** (Optional) list of simulation vectors to include as responses when reading \
+ from UNSMRY-files in the defined ensembles (default is all vectors). * can be \
+ used as wild card.
+* **`sampling`:** (Optional) sampling frequency when reading simulation data directly from \
+ `.UNSMRY`-files (default is monthly).
+
+?> The `UNSMRY` input method implies that the "DATE" vector will be used as a filter \
+ of type `single` (as defined below under `response_filters`).
+
+
+**Common settings for all input options**
+
+All of these are optional, some have defaults seen in the code snippet below.
+
+* **`response_filters`:** Optional dictionary of responses (columns in csv file or simulation \
+ vectors) that can be used as row filtering before aggregation. \
+ Valid options:
+ * `single`: Dropdown with single selection.
+ * `multi`: Dropdown with multiple selection.
+ * `range`: Slider with range selection.
+* **`response_ignore`:** List of response (columns in csv or simulation vectors) to ignore \
+ (cannot use with response_include).
+* **`response_include`:** List of response (columns in csv or simulation vectors) to include \
+ (cannot use with response_ignore).
+* **`parameter_ignore`:** List of parameters (columns in csv or simulation vectors) to ignore
+* **`aggregation`:** How to aggregate responses per realization. Either `sum` or `mean`.
+
+---
+
+?> Non-numerical (string-based) input parameters and responses are removed.
+
+?> The responses will be aggregated per realization; meaning that if your filters do not reduce \
+the response to a single value per realization in your data, the values will be aggregated \
+accoording to your defined `aggregation`. If e.g. the response is a form of volume, \
+and the filters are regions (or other subdivisions of the total volume), then `sum` would \
+be a natural aggregation. If on the other hand the response is the pressures in the \
+same volume, aggregation as `mean` over the subdivisions of the same volume \
+would make more sense (though the pressures in this case would not be volume weighted means, \
+and the aggregation would therefore likely be imprecise).
+
+!> Regression models break down when there are **duplicate or highly correlated parameters**. \
+Please make sure to properly filter your inputs or the model will give answers that are misleading.
+
+!> It is **strongly recommended** to keep the data frequency to a regular frequency (like \
+`monthly` or `yearly`). This applies to both csv input and when reading from `UNSMRY` \
+(controlled by the `sampling` key). This is because the statistics are calculated per DATE over \
+all realizations in an ensemble, and the available dates should therefore not differ between \
+individual realizations of an ensemble.
+
+**Using aggregated data**
+
+The `parameter_csv` file must have columns `REAL`, `ENSEMBLE` and the parameter columns.
+
+The `response_csv` file must have columns `REAL`, `ENSEMBLE` and the response columns \
+(and the columns to use as `response_filters`, if that option is used).
+
+
+**Using a response file per realization**
+
+Parameters are extracted automatically from the `parameters.txt` files in the individual
+realizations, using the `fmu-ensemble` library.
+
+The `response_file` must have the response columns (and the columns to use as `response_filters`, \
+if that option is used).
+
+
+**Using simulation time series data directly from `UNSMRY` files as responses**
+
+Parameters are extracted automatically from the `parameters.txt` files in the individual
+realizations, using the `fmu-ensemble` library.
+
+Responses are extracted automatically from the `UNSMRY` files in the individual realizations,
+using the `fmu-ensemble` library.
+
+!> The `UNSMRY` files are auto-detected by `fmu-ensemble` in the `eclipse/model` folder of the \
+individual realizations. You should therefore not have more than one `UNSMRY` file in this \
+folder, to avoid risk of not extracting the right data.
+"""
+
+ # pylint:disable=too-many-arguments
+ # pylint:disable=too-many-lines
+ def __init__(
+ self,
+ app,
+ parameter_csv: Path = None,
+ response_csv: Path = None,
+ ensembles: list = None,
+ response_file: str = None,
+ response_filters: dict = None,
+ response_ignore: list = None,
+ response_include: list = None,
+ parameter_ignore: list = None,
+ column_keys: list = None,
+ sampling: str = "monthly",
+ aggregation: str = "sum",
+ ):
+
+ super().__init__()
+
+ self.parameter_csv = parameter_csv if parameter_csv else None
+ self.response_csv = response_csv if response_csv else None
+ self.response_file = response_file if response_file else None
+ self.response_filters = response_filters if response_filters else {}
+ self.response_ignore = response_ignore if response_ignore else None
+ self.parameter_ignore = parameter_ignore if parameter_ignore else None
+ self.column_keys = column_keys
+ self.time_index = sampling
+ self.aggregation = aggregation
+
+ if response_ignore and response_include:
+ raise ValueError(
+ 'Incorrent argument. Either provide "response_include", '
+ '"response_ignore" or neither'
+ )
+ if parameter_csv and response_csv:
+ if ensembles or response_file:
+ raise ValueError(
+ 'Incorrect arguments. Either provide "csv files" or '
+ '"ensembles and response_file".'
+ )
+ self.parameterdf = read_csv(self.parameter_csv)
+ self.responsedf = read_csv(self.response_csv)
+
+ elif ensembles:
+ self.ens_paths = {
+ ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens]
+ for ens in ensembles
+ }
+ self.parameterdf = load_parameters(
+ ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet"
+ )
+ if self.response_file:
+ self.responsedf = load_csv(
+ ensemble_paths=self.ens_paths,
+ csv_file=response_file,
+ ensemble_set_name="EnsembleSet",
+ )
+ else:
+ self.responsedf = load_smry(
+ ensemble_paths=self.ens_paths,
+ column_keys=self.column_keys,
+ time_index=self.time_index,
+ )
+ self.response_filters["DATE"] = "single"
+ else:
+ raise ValueError(
+ 'Incorrect arguments.\
+ Either provide "csv files" or "ensembles and response_file".'
+ )
+ self.check_runs()
+ self.check_response_filters()
+ if response_ignore:
+ self.responsedf.drop(response_ignore, errors="ignore", axis=1, inplace=True)
+ if response_include:
+ self.responsedf.drop(
+ self.responsedf.columns.difference(
+ [
+ "REAL",
+ "ENSEMBLE",
+ *response_include,
+ *list(response_filters.keys()),
+ ]
+ ),
+ errors="ignore",
+ axis=1,
+ inplace=True,
+ )
+ if parameter_ignore:
+ self.parameterdf.drop(parameter_ignore, axis=1, inplace=True)
+
+ self.theme = app.webviz_settings["theme"]
+ self.parameterdf = self.parameterdf.loc[
+ :, self.parameterdf.apply(pd.Series.nunique) != 1
+ ]
+ self.set_callbacks(app)
+
+ @property
+ def tour_steps(self):
+ """ Adding a "Guided tour" functionality """
+ steps = [
+ {
+ "id": self.uuid("layout"),
+ "content": (
+ "Dashboard displaying the results of a multiple regression of parameters and "
+ "a chosen response using forward selection to limit the number of terms. "
+ "Interaction terms can be added, up to third order. Adjusted R-squared is "
+ "used as the criterion in the forward selection algorithm."
+ ),
+ },
+ {
+ "id": self.uuid("p-values-plot"),
+ "content": (
+ "A plot showing the p-values for the terms from the table ranked from most "
+ "significant to least significant (low to high p-value). A bar is highlighted "
+ "if its corresponding p-value is below 0.05, meaning that the terms are "
+ "likely to be significant. Otherwise, the bars are colored gray."
+ ),
+ },
+ {
+ "id": self.uuid("coefficient-plot"),
+ "content": (
+ "A plot showing the sign of the terms' regression coefficient values by "
+ "arrows pointing up or down, illustrating a positive or a negative coefficient "
+ "respectively. An arrow is highlighted if its corresponding p-value is below "
+ "0.05, meaning that the terms are likely to be significant. Otherwise, "
+ "the arrows are colored gray."
+ ),
+ },
+ {
+ "id": self.uuid("table"),
+ "content": (
+ "A table showing the p-values for a forward selected combination of "
+ "parameters for a chosen response."
+ ),
+ },
+ {"id": self.uuid("ensemble"), "content": ("Select the active ensemble.")},
+ {"id": self.uuid("responses"), "content": ("Select the active response.")},
+ {
+ "id": self.uuid("exclude-include"),
+ "content": (
+ "Select which parameters to include in your model. Exclusive mode lets you "
+ "remove specific parameters from beeing considered in the model selection. "
+ "Subset mode lets you pick a subset of parameters to investigate. Parameters "
+ "included here are not guaranteed to be included in the output model."
+ ),
+ },
+ {
+ "id": self.uuid("interaction"),
+ "content": (
+ "Select the depth of the interaction level. 'Off' allows only for the "
+ "parameters in their original state. '2 levels' allow for the product of two "
+ "original parameters. '3 levels' allow for the product of three original "
+ "parameters. This feature allows you to investigate possible feedback effects."
+ ),
+ },
+ {
+ "id": self.uuid("max-params"),
+ "content": (
+ "Choose the maximum number of parameters to include in your model. If "
+ "interaction is active, the number of included parameters is the selected "
+ "value here plus the interaction level. This is to make sure the interaction "
+ "terms have an intuitive interpretation."
+ ),
+ },
+ {
+ "id": self.uuid("force-in"),
+ "content": ("Select parameters to force into the model."),
+ },
+ {
+ "id": self.uuid("submit-button"),
+ "content": (
+ "Press this button to update the table and the plots based on the settings "
+ "above."
+ ),
+ },
+ ]
+ return steps
+
+ @property
+ def responses(self):
+ """ Returns valid responses. Filters out non numerical and filterable columns. """
+ responses = list(
+ self.responsedf.drop(["ENSEMBLE", "REAL"], axis=1)
+ .apply(pd.to_numeric, errors="coerce")
+ .dropna(how="all", axis="columns")
+ .columns
+ )
+ return [p for p in responses if p not in self.response_filters.keys()]
+
+ @property
+ def parameters(self):
+ """ Returns numerical input parameters """
+ parameters = list(
+ self.parameterdf.drop(["ENSEMBLE", "REAL"], axis=1)
+ .apply(pd.to_numeric, errors="coerce")
+ .dropna(how="all", axis="columns")
+ .columns
+ )
+ return parameters
+
+ @property
+ def ensembles(self):
+ """ Returns list of ensembles """
+ return list(self.parameterdf["ENSEMBLE"].unique())
+
+ @property
+ def colors(self):
+ """Dictionary of colors that are frequently used"""
+ return {
+ "default color": self.theme.plotly_theme["layout"]["colorway"][0],
+ "gray": "#606060",
+ "dark gray": "#303030",
+ }
+
+ def check_runs(self):
+ """ Check that input parameters and response files have
+ the same number of runs """
+ for col in ["ENSEMBLE", "REAL"]:
+ if sorted(list(self.parameterdf[col].unique())) != sorted(
+ list(self.responsedf[col].unique())
+ ):
+ raise ValueError("Parameter and response files have different runs")
+
+ def check_response_filters(self):
+ """ Check that provided response filters are valid """
+ if self.response_filters:
+ for col_name, col_type in self.response_filters.items():
+ if col_name not in self.responsedf.columns:
+ raise ValueError(f"{col_name} is not in response file")
+ if col_type not in ["single", "multi", "range"]:
+ raise ValueError(
+ f"Filter type {col_type} for {col_name} is not valid."
+ )
+
+ @property
+ def filter_layout(self):
+ """ Layout to display selectors for response filters """
+ children = []
+ for col_name, col_type in self.response_filters.items():
+ values = list(self.responsedf[col_name].unique())
+ if col_type == "multi":
+ selector = wcc.Select(
+ id=self.uuid(f"filter-{col_name}"),
+ options=[{"label": val, "value": val} for val in values],
+ value=values,
+ multi=True,
+ size=min(20, len(values)),
+ )
+ elif col_type == "single":
+ selector = dcc.Dropdown(
+ id=self.uuid(f"filter-{col_name}"),
+ options=[{"label": val, "value": val} for val in values],
+ value=values[0],
+ multi=False,
+ clearable=False,
+ )
+ children.append(html.Div(children=[html.Label(col_name), selector,]))
+ return children
+
+ @property
+ def control_layout(self):
+ """Layout to select forward selection input, e.g. ensemble, response, settings, etc."""
+ return [
+ html.Div(
+ [
+ html.Div("Ensemble:", style={"font-weight": "bold"}),
+ dcc.Dropdown(
+ id=self.uuid("ensemble"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.ensembles
+ ],
+ clearable=False,
+ value=self.ensembles[0],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div("Response:", style={"font-weight": "bold"}),
+ dcc.Dropdown(
+ id=self.uuid("responses"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.responses
+ ],
+ clearable=False,
+ value=self.responses[0],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div(
+ "Parameters:",
+ style={
+ "font-weight": "bold",
+ "display": "inline-block",
+ "margin-right": "10px",
+ },
+ ),
+ dcc.RadioItems(
+ id=self.uuid("exclude-include"),
+ options=[
+ {"label": "Exclusive", "value": "exc"},
+ {"label": "Subset", "value": "inc"},
+ ],
+ value="exc",
+ labelStyle={"display": "inline-block"},
+ style={"fontSize": ".80em"},
+ ),
+ dcc.Dropdown(
+ id=self.uuid("parameter-list"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.parameters
+ ],
+ clearable=True,
+ multi=True,
+ placeholder="",
+ value=[],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div("Filters:", style={"font-weight": "bold"}),
+ html.Div(children=self.filter_layout),
+ html.Div(
+ [
+ html.Div(
+ "Settings:", style={"font-weight": "bold", "marginTop": "20px"},
+ ),
+ html.Div(
+ "Interaction",
+ style={"display": "inline-block", "margin-right": "10px"},
+ ),
+ dcc.Slider(
+ id=self.uuid("interaction"),
+ min=0,
+ max=2,
+ step=None,
+ marks={0: "Off", 1: "2 levels", 2: "3 levels"},
+ value=0,
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div(
+ "Max number of parameters",
+ style={"display": "inline-block", "margin-right": "10px"},
+ ),
+ dcc.Dropdown(
+ id=self.uuid("max-params"),
+ options=[
+ {"label": val, "value": val}
+ for val in range(1, min(10, len(self.parameterdf.columns)))
+ ],
+ clearable=False,
+ value=3,
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div(
+ "Force in",
+ style={"display": "inline-block", "margin-right": "10px"},
+ ),
+ dcc.Dropdown(
+ id=self.uuid("force-in"),
+ clearable=True,
+ multi=True,
+ placeholder="Select parameters to force in",
+ options=[
+ {"label": ens, "value": ens} for ens in self.parameters
+ ],
+ value=[],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div(
+ style={"display": "grid"},
+ children=[
+ html.Button(
+ id=self.uuid("submit-button"),
+ children="Update",
+ style={
+ "background-color": "LightGray",
+ "cursor": "not-allowed",
+ "border": "none",
+ },
+ disabled=True,
+ )
+ ],
+ ),
+ ]
+
+ @property
+ def layout(self):
+ """ Main layout """
+ return wcc.FlexBox(
+ id=self.uuid("layout"),
+ children=[
+ html.Div(style={"flex": 1}, children=self.control_layout),
+ html.Div(
+ style={"flex": 3},
+ children=[
+ html.Div(
+ id=self.uuid("page-title"),
+ style={
+ "textAlign": "center",
+ "display": "grid",
+ "font-weight": "bold",
+ "fontSize": "1.3em",
+ },
+ ),
+ html.Div(children=[wcc.Graph(id=self.uuid("p-values-plot"))]),
+ html.Div(
+ children=[wcc.Graph(id=self.uuid("coefficient-plot"))]
+ ),
+ html.Label(
+ "Table of parameters and their corresponding p-values",
+ style={"fontSize": ".925em", "textAlign": "center",},
+ ),
+ DataTable(
+ id=self.uuid("table"),
+ sort_action="native",
+ filter_action="native",
+ sort_by=[{"column_id": "P>|t|", "direction": "asc"}],
+ page_action="native",
+ page_size=10,
+ style_cell={"fontSize": ".80em"},
+ ),
+ ],
+ ),
+ ],
+ )
+
+ def get_callback_list(self, func):
+ """ Returns a list with either Inputs or States for multiple regression callback """
+ components = [
+ func(self.uuid("exclude-include"), "value"),
+ func(self.uuid("parameter-list"), "value"),
+ func(self.uuid("ensemble"), "value"),
+ func(self.uuid("responses"), "value"),
+ func(self.uuid("force-in"), "value"),
+ func(self.uuid("interaction"), "value"),
+ func(self.uuid("max-params"), "value"),
+ ]
+ if self.response_filters:
+ for col_name in self.response_filters:
+ components.append(func(self.uuid(f"filter-{col_name}"), "value"))
+ return components
+
+ @property
+ def model_callback_states(self):
+ """ List of states for multiple regression callback """
+ return self.get_callback_list(State)
+
+ @property
+ def model_callback_inputs(self):
+ """ List of inputs for multiple regression callback """
+ inputs = self.get_callback_list(Input)
+ inputs.insert(0, Input(self.uuid("submit-button"), "n_clicks"))
+ return inputs
+
+ def make_response_filters(self, filters):
+ """ Returns a list of active response filters """
+ filteroptions = []
+ if filters:
+ for i, (col_name, col_type) in enumerate(self.response_filters.items()):
+ filteroptions.append(
+ {"name": col_name, "type": col_type, "values": filters[i]}
+ )
+ return filteroptions
+
+ def set_callbacks(self, app):
+ @app.callback(
+ [
+ Output(self.uuid("submit-button"), "disabled"),
+ Output(self.uuid("submit-button"), "style"),
+ ],
+ self.model_callback_inputs,
+ )
+ # pylint: disable=unused-argument
+ # pylint: disable=unused-variable
+ def update_button(
+ n_clicks,
+ exc_inc,
+ parameter_list,
+ ensemble,
+ response,
+ force_in,
+ interaction,
+ max_vars,
+ *filters,
+ ):
+ ctx = dash.callback_context
+ if dash.callback_context.triggered[0]["value"] is None:
+ raise PreventUpdate
+ # if the triggered comp is the sumbit-button
+ if ctx.triggered[0]["prop_id"].split(".")[0] == self.uuid("submit-button"):
+ return (
+ True,
+ {
+ "background-color": "LightGray",
+ "cursor": "not-allowed",
+ "border": "none",
+ },
+ )
+ return (
+ False,
+ {"color": "white", "background-color": self.colors["default color"]},
+ )
+
+ @app.callback(
+ Output(self.uuid("parameter-list"), "placeholder"),
+ [Input(self.uuid("exclude-include"), "value")],
+ )
+ # pylint: disable=unused-variable
+ def update_placeholder(exc_inc):
+ """ Callback to update placeholder text in exlude/subset mode """
+ if exc_inc == "exc":
+ return "Select parameters to exclude"
+ return "Select parameters for subset"
+
+ @app.callback(
+ [
+ Output(self.uuid("force-in"), "options"),
+ Output(self.uuid("force-in"), "value"),
+ ],
+ [
+ Input(self.uuid("parameter-list"), "value"),
+ Input(self.uuid("exclude-include"), "value"),
+ ],
+ [State(self.uuid("force-in"), "value"),],
+ )
+ # pylint: disable=unused-variable
+ def update_force_in(parameter_list, exc_inc, force_in):
+ """ Callback to update options for force in """
+ if dash.callback_context.triggered[0]["value"] is None:
+ raise PreventUpdate
+ if exc_inc == "exc":
+ df = self.parameterdf.drop(
+ columns=["ENSEMBLE", "REAL"] + parameter_list
+ )
+ elif exc_inc == "inc":
+ df = self.parameterdf[parameter_list] if parameter_list else []
+
+ fi_lst = list(df)
+ options = [{"label": fi, "value": fi} for fi in fi_lst]
+ # Add only valid parameters
+ force_in_updated = []
+ for param in force_in:
+ if param in fi_lst:
+ force_in_updated.append(param)
+ return options, force_in_updated
+
+ @app.callback(
+ [
+ Output(self.uuid("table"), "data"),
+ Output(self.uuid("table"), "columns"),
+ Output(self.uuid("page-title"), "children"),
+ Output(self.uuid("p-values-plot"), "figure"),
+ Output(self.uuid("coefficient-plot"), "figure"),
+ ],
+ [Input(self.uuid("submit-button"), "n_clicks")],
+ self.model_callback_states,
+ )
+ # pylint:disable=too-many-locals
+ # pylint: disable=unused-argument
+ def _update_visualizations(
+ n_clicks,
+ exc_inc,
+ parameter_list,
+ ensemble,
+ response,
+ force_in,
+ interaction,
+ max_vars,
+ *filters,
+ ):
+ """ Callback to update the model for multiple regression
+
+ 1. Filters and aggregates response dataframe per realization
+ 2. Filters parameters dataframe on selected ensemble
+ 3. Merge parameter and response dataframe
+ 4. Fit model using forward stepwise regression, with or without interactions
+ 5. Generate table and plots
+ """
+ filteroptions = self.make_response_filters(filters)
+ responsedf = filter_and_sum_responses(
+ self.responsedf,
+ ensemble,
+ response,
+ filteroptions=filteroptions,
+ aggregation=self.aggregation,
+ )
+ if exc_inc == "exc":
+ parameterdf = self.parameterdf.drop(parameter_list, axis=1)
+ elif exc_inc == "inc":
+ parameterdf = self.parameterdf[["ENSEMBLE", "REAL"] + parameter_list]
+
+ parameterdf = parameterdf.loc[self.parameterdf["ENSEMBLE"] == ensemble]
+ df = pd.merge(responsedf, parameterdf, on=["REAL"]).drop(
+ columns=["REAL", "ENSEMBLE"]
+ )
+
+ if exc_inc == "inc" and not parameter_list:
+ return (
+ [{"e": ""}],
+ [{"name": "", "id": "e"}],
+ "Please select parameters to be included in the model",
+ {
+ "layout": {
+ "title": "Please select parameters to include in the model
"
+ }
+ },
+ {
+ "layout": {
+ "title": "Please select parameters to include in the model
"
+ }
+ },
+ )
+
+ result = gen_model(
+ df,
+ response,
+ force_in=force_in,
+ max_vars=max_vars,
+ interaction_degree=interaction,
+ )
+ if not result or result.model.fit().df_model == 0:
+ return (
+ [{"e": ""}],
+ [{"name": "", "id": "e"}],
+ "Cannot calculate fit for given selection. Select a different "
+ "response or filter setting",
+ {
+ "layout": {
+ "title": "Cannot calculate fit for given selection
"
+ "Select a different response or filter setting."
+ }
+ },
+ {
+ "layout": {
+ "title": "Cannot calculate fit for given selection
"
+ "Select a different response or filter setting."
+ }
+ },
+ )
+ # Generate table
+ table = result.model.fit().summary2().tables[1].drop("Intercept")
+ table.drop(
+ ["Std.Err.", "Coef.", "t", "[0.025", "0.975]"], axis=1, inplace=True
+ )
+ table.index.name = "Parameter"
+ table.reset_index(inplace=True)
+ columns = [
+ {"name": i, "id": i, "type": "numeric", "format": Format(precision=4),}
+ for i in table.columns
+ ]
+ data = table.to_dict("rows")
+
+ # Get p-values for plot
+ p_sorted = result.pvalues.sort_values().drop("Intercept")
+
+ # Get coefficients for plot
+ coeff_sorted = result.params.sort_values(ascending=False).drop("Intercept")
+
+ return (
+ data,
+ columns,
+ f"Multiple regression with {response} as response",
+ make_p_values_plot(p_sorted, self.theme, self.colors),
+ make_arrow_plot(coeff_sorted, p_sorted, self.theme, self.colors),
+ )
+
+ def add_webvizstore(self):
+ if self.parameter_csv and self.response_csv:
+ return [
+ (read_csv, [{"csv_file": self.parameter_csv,}],),
+ (read_csv, [{"csv_file": self.response_csv,}],),
+ ]
+ return [
+ (
+ load_parameters,
+ [
+ {
+ "ensemble_paths": self.ens_paths,
+ "ensemble_set_name": "EnsembleSet",
+ }
+ ],
+ ),
+ (
+ load_csv,
+ [
+ {
+ "ensemble_paths": self.ens_paths,
+ "csv_file": self.response_file,
+ "ensemble_set_name": "EnsembleSet",
+ }
+ ],
+ )
+ if self.response_file
+ else (
+ load_smry,
+ [
+ {
+ "ensemble_paths": self.ens_paths,
+ "column_keys": self.column_keys,
+ "time_index": self.time_index,
+ }
+ ],
+ ),
+ ]
+
+
+@CACHE.memoize(timeout=CACHE.TIMEOUT)
+def gen_model(
+ df: pd.DataFrame,
+ response: str,
+ max_vars: int = 9,
+ force_in: list = None,
+ interaction_degree: bool = False,
+):
+ """ Wrapper for model selection algorithm. """
+ if interaction_degree:
+ df = _gen_interaction_df(df, response, interaction_degree + 1)
+ return forward_selected(data=df, resp=response, force_in=force_in, maxvars=max_vars)
+
+
+@CACHE.memoize(timeout=CACHE.TIMEOUT)
+def _gen_interaction_df(df: pd.DataFrame, response: str, degree: int = 2):
+ newdf = df.copy()
+
+ name_combinations = []
+ for i in range(1, degree + 1):
+ name_combinations += [
+ " × ".join(combination)
+ for combination in combinations(newdf.drop(columns=response).columns, i)
+ ]
+ for name in name_combinations:
+ if name.split(" × "):
+ newdf[name] = newdf.filter(items=name.split(" × ")).product(axis=1)
+ return newdf
+
+
+# pylint:disable=too-many-locals
+def forward_selected(
+ data: pd.DataFrame, resp: str, force_in: list = None, maxvars: int = 5
+):
+ """ Forward model selection algorithm
+
+ Returns Statsmodels RegressionResults object.
+ The algortihm is a modified standard forward selection algorithm.
+ The selection criterion chosen is adjusted R squared.
+ See this link for more information about the algorithm:
+ https://en.wikipedia.org/wiki/Stepwise_regression
+
+ Steps of the algorithm:
+ - Initialize values
+ - While there are parameters left and the last model was the best model yet and the
+ parameter limit isnt reached, for every parameter not chosen yet:
+ 1. If it is an interaction parameter, add the base parameters to the model.
+ 2. Create a model matrix, fit the model and calculate selection criterion for each
+ remaining parameter.
+ 3. Pick the best parameter and repeat with remaining parameters until we satisfy an
+ exit condition.
+ 4. Finally fit a Statsmodels regression and return the results.
+
+ Exit conditions:
+ - No parameters in remaining.
+ - The last model was not the best model.
+ - Hit cap on maximum parameters.
+ - We are about to add more parameters than there are observations.
+ """
+
+ # Initialize values for use in algorithm (sst is the total sum of squares)
+ response = data[resp].to_numpy(dtype="float32")
+ # Check for constant response
+ if np.all(response == response[0]):
+ return None
+ sst = np.sum((response - np.mean(response)) ** 2)
+ remaining = set(data.columns).difference(set(force_in + [resp]))
+ selected = force_in
+ current_score, best_new_score = 0.0, 0.0
+ while remaining and current_score == best_new_score and len(selected) < maxvars:
+ scores_with_candidates = []
+ for candidate in remaining:
+ if " × " in candidate:
+ current_model = (
+ selected.copy()
+ + [candidate]
+ + list(set(candidate.split(" × ")).difference(set(selected)))
+ )
+ else:
+ current_model = selected.copy() + [candidate]
+ parameters = data.filter(items=current_model).to_numpy(dtype="float64")
+ num_parameters = parameters.shape[1]
+ parameters = np.append(parameters, np.ones((len(response), 1)), axis=1)
+
+ # Fit model
+ try:
+ beta = la.inv(parameters.T @ parameters) @ parameters.T @ response
+ except la.LinAlgError:
+ # This clause lets us skip singluar and other non-valid model matricies.
+ continue
+
+ if len(response) - num_parameters - 1 < 1:
+ # The exit condition means adding this parameter would add more parameters than
+ # observations. This causes infinite variance in the model so we return the current
+ # best model
+
+ model_df = data.filter(items=selected)
+ model_df["Intercept"] = np.ones((len(response), 1))
+ model_df["response"] = response
+
+ return _model_warnings(model_df)
+
+ f_vec = beta @ parameters.T
+ ss_res = np.sum((f_vec - np.mean(response)) ** 2)
+
+ r_2_adj = 1 - (1 - (ss_res / sst)) * (
+ (len(response) - 1) / (len(response) - num_parameters - 1)
+ )
+ scores_with_candidates.append((r_2_adj, candidate))
+
+ # If the best parameter is in an interaction, add all base parameters
+ scores_with_candidates.sort(key=lambda x: x[0])
+ best_new_score, best_candidate = scores_with_candidates.pop()
+ if current_score < best_new_score:
+ if " × " in best_candidate:
+ for base_parameter in best_candidate.split(" × "):
+ if base_parameter in remaining:
+ remaining.remove(base_parameter)
+ if base_parameter not in selected:
+ selected.append(base_parameter)
+
+ remaining.remove(best_candidate)
+ selected.append(best_candidate)
+ current_score = best_new_score
+ # Finally fit a statsmodel from the selected parameters
+ model_df = data.filter(items=selected)
+ model_df["Intercept"] = np.ones((len(response), 1))
+ model_df["response"] = response
+ return _model_warnings(model_df)
+
+
+def _model_warnings(design_matrix: pd.DataFrame):
+ with warnings.catch_warnings():
+ # Handle warnings so the graphics indicate that the model failed for the current input.
+ warnings.filterwarnings("error", category=RuntimeWarning)
+ warnings.filterwarnings("ignore", category=UserWarning)
+ try:
+ model = sm.OLS(
+ design_matrix["response"], design_matrix.drop(columns="response")
+ ).fit()
+ except (RuntimeWarning) as error:
+ print("error: ", error)
+ return None
+ return model
+
+
+def make_p_values_plot(p_sorted, theme, colors):
+ """ Make p-values plot """
+ p_values = p_sorted.values
+ parameters = p_sorted.index
+ fig = go.Figure()
+ fig.add_trace(
+ {
+ "x": [param.replace(" × ", "
× ") for param in parameters],
+ "y": p_values,
+ "type": "bar",
+ "marker": {
+ "color": [
+ colors["default color"] if val < 0.05 else colors["gray"]
+ for val in p_values
+ ]
+ },
+ }
+ )
+ fig.update_traces(
+ hovertemplate=[
+ "P-value: " + str(format(pval, ".4g")) + ""
+ for pval in p_values
+ ]
+ )
+ fig.add_shape(
+ {
+ "type": "line",
+ "y0": 0.05,
+ "y1": 0.05,
+ "x0": -0.5,
+ "x1": len(p_values) - 0.5,
+ "xref": "x",
+ "line": {"color": colors["dark gray"], "width": 1.5},
+ }
+ )
+ fig.add_annotation(
+ x=len(p_values) - 0.2, y=0.05, text="P-value
= 0.05", showarrow=False
+ )
+ fig = fig.to_dict()
+ fig["layout"].update(
+ barmode="relative",
+ height=500,
+ hovermode="x",
+ title=dict(
+ text="P-values for the parameters. Value lower than 0.05 indicates "
+ "statistical significance",
+ x=0.5,
+ ),
+ )
+ fig["layout"] = theme.create_themed_layout(fig["layout"])
+ return fig
+
+
+def make_arrow_plot(coeff_sorted, p_sorted, theme, colors):
+ """ Make arrow plot for the coefficients """
+ params_to_coefs = dict(coeff_sorted)
+ p_values = p_sorted.values
+ parameters = p_sorted.index
+ coeff_vals = list(map(params_to_coefs.get, parameters))
+ centre_dist = len(parameters) / 3
+
+ # Array with len(parameters) points for the x-axis, centered about x=1, with domain [0, 2]
+ x = (
+ [1]
+ if len(parameters) == 1
+ else np.linspace(
+ max(1 - centre_dist, 0), min(1 + centre_dist, 2), num=len(parameters),
+ )
+ )
+ y = np.zeros(len(x))
+ fig = go.Figure(
+ go.Scatter(
+ x=x,
+ y=y,
+ opacity=0,
+ marker=dict(
+ color=(p_values < 0.05).astype(
+ np.int
+ ), # 0.05: upper limit for stat.sig. p-value
+ colorscale=[(0, colors["gray"]), (1, colors["default color"])],
+ cmin=0,
+ cmax=1,
+ ),
+ )
+ )
+ fig.update_traces(
+ hovertemplate=[
+ "P-value: " + str(format(pval, ".4g")) + ""
+ for pval in p_values
+ ]
+ )
+ # Arrows are drawn and added to plot.
+ # Parameters with positive coefficients have arrows pointing upwards, and vice versa.
+ for i, sign in enumerate(np.sign(coeff_vals)):
+ x_coordinate = x[i]
+ fig.add_shape(
+ type="path",
+ path=f" M {x_coordinate-0.025} 0 "
+ f" L {x_coordinate-0.025} {sign*0.06} "
+ f" L {x_coordinate-0.07} {sign*0.06} "
+ f" L {x_coordinate} {sign*0.08} "
+ f" L {x_coordinate+0.07} {sign*0.06} "
+ f" L {x_coordinate+0.025} {sign*0.06} "
+ f" L {x_coordinate+0.025} 0 ",
+ fillcolor=colors["default color"] if p_values[i] < 0.05 else colors["gray"],
+ line_width=0,
+ )
+ fig.add_shape(
+ type="line",
+ x0=-0.1,
+ y0=0,
+ x1=2 + 0.1,
+ y1=0,
+ line=dict(color=colors["dark gray"], width=1.5),
+ )
+ fig.add_shape(
+ type="path",
+ path=f" M {2+0.12} 0 L {2+0.1} -0.0035 L {2+0.1} 0.0035 Z",
+ line_color=colors["dark gray"],
+ line_width=1.5,
+ )
+ # Description of horisontal axis, placed 0.35 units rightwards from end of plot domain.
+ fig.add_annotation(x=2 + 0.35, y=0, text="Increasing
p-value", showarrow=False)
+ fig = fig.to_dict()
+ fig["layout"].update(
+ barmode="relative",
+ height=500,
+ hovermode="x",
+ title=dict(
+ text="Parameters impact (increase or decrese) on response and their significance",
+ x=0.5,
+ ),
+ yaxis=dict(
+ range=[-0.08, 0.08], title="", showticklabels=False
+ ), # 0.08: arrow height
+ xaxis=dict(
+ title="",
+ ticktext=[param.replace(" × ", "
× ") for param in parameters],
+ tickvals=x,
+ ),
+ )
+ fig["layout"] = theme.create_themed_layout(fig["layout"])
+ return fig
+
+
+def make_range_slider(domid, values, col_name):
+ try:
+ values.apply(pd.to_numeric, errors="raise")
+ except ValueError:
+ raise ValueError(
+ f"Cannot calculate filter range for {col_name}. Ensure that it is a numerical column."
+ )
+ return dcc.RangeSlider(
+ id=domid,
+ min=values.min(),
+ max=values.max(),
+ step=calculate_slider_step(
+ min_value=values.min(),
+ max_value=values.max(),
+ steps=len(list(values.unique())) - 1,
+ ),
+ value=[values.min(), values.max()],
+ marks={
+ str(values.min()): {"label": f"{values.min():.2f}"},
+ str(values.max()): {"label": f"{values.max():.2f}"},
+ },
+ )
+
+
+@CACHE.memoize(timeout=CACHE.TIMEOUT)
+@webvizstore
+def read_csv(csv_file) -> pd.DataFrame:
+ return pd.read_csv(csv_file, index_col=False)
diff --git a/webviz_subsurface/plugins/_parameter_response_parallel_coordinates.py b/webviz_subsurface/plugins/_parameter_response_parallel_coordinates.py
new file mode 100644
index 000000000..29d376efe
--- /dev/null
+++ b/webviz_subsurface/plugins/_parameter_response_parallel_coordinates.py
@@ -0,0 +1,526 @@
+from pathlib import Path
+import pandas as pd
+import dash_html_components as html
+import dash_core_components as dcc
+import webviz_core_components as wcc
+from dash.dependencies import Input, Output
+from webviz_config.webviz_store import webvizstore
+from webviz_config.common_cache import CACHE
+from webviz_config import WebvizPluginABC
+from .._datainput.fmu_input import load_parameters, load_csv, load_smry
+from .._utils.response_aggregation import filter_and_sum_responses
+
+
+class ParameterResponseParallelCoordinates(WebvizPluginABC):
+
+ """
+ Visualizes parameters in a parallel parameter plot, colored by the value of the response.
+ Helpful for seeing trends in the relation of the parameters and the response.
+---
+**Three main options for input data: Aggregated, file per realization and read from UNSMRY.**
+
+**Using aggregated data**
+* **`parameter_csv`:** Aggregated csvfile for input parameters with `REAL` and `ENSEMBLE` columns \
+(absolute path or relative to config file).
+* **`response_csv`:** Aggregated csvfile for response parameters with `REAL` and `ENSEMBLE` \
+columns (absolute path or relative to config file).
+
+
+**Using a response file per realization**
+* **`ensembles`:** Which ensembles in `shared_settings` to visualize.
+* **`response_file`:** Local (per realization) csv file for response parameters (Cannot be \
+ combined with `response_csv` and `parameter_csv`).
+
+
+**Using simulation time series data directly from `UNSMRY` files as responses**
+* **`ensembles`:** Which ensembles in `shared_settings` to visualize. The lack of `response_file` \
+ implies that the input data should be time series data from simulation `.UNSMRY` \
+ files, read using `fmu-ensemble`.
+* **`column_keys`:** (Optional) slist of simulation vectors to include as responses when reading \
+ from UNSMRY-files in the defined ensembles (default is all vectors). * can be \
+ used as wild card.
+* **`sampling`:** (Optional) sampling frequency when reading simulation data directly from \
+ `.UNSMRY`-files (default is monthly).
+
+?> The `UNSMRY` input method implies that the "DATE" vector will be used as a filter \
+ of type `single` (as defined below under `response_filters`).
+
+
+**Common settings for all input options**
+
+All of these are optional, some have defaults seen in the code snippet below.
+
+* **`response_filters`:** Optional dictionary of responses (columns in csv file or simulation \
+ vectors) that can be used as row filtering before aggregation. \
+ Valid options:
+ * `single`: Dropdown with single selection.
+ * `multi`: Dropdown with multiple selection.
+ * `range`: Slider with range selection.
+* **`response_ignore`:** List of response (columns in csv or simulation vectors) to ignore \
+ (cannot use with response_include).
+* **`response_include`:** List of response (columns in csv or simulation vectors) to include \
+ (cannot use with response_ignore).
+* **`aggregation`:** How to aggregate responses per realization. Either `sum` or `mean`.
+
+
+---
+
+?> Non-numerical (string-based) input parameters and responses are removed.
+
+?> The responses will be aggregated per realization; meaning that if your filters do not reduce \
+the response to a single value per realization in your data, the values will be aggregated \
+accoording to your defined `aggregation`. If e.g. the response is a form of volume, \
+and the filters are regions (or other subdivisions of the total volume), then `sum` would \
+be a natural aggregation. If on the other hand the response is the pressures in the \
+same volume, aggregation as `mean` over the subdivisions of the same volume \
+would make more sense (though the pressures in this case would not be volume weighted means, \
+and the aggregation would therefore likely be imprecise).
+
+!> It is **strongly recommended** to keep the data frequency to a regular frequency (like \
+`monthly` or `yearly`). This applies to both csv input and when reading from `UNSMRY` \
+(controlled by the `sampling` key). This is because the statistics are calculated per DATE over \
+all realizations in an ensemble, and the available dates should therefore not differ between \
+individual realizations of an ensemble.
+
+**Using aggregated data**
+
+The `parameter_csv` file must have columns `REAL`, `ENSEMBLE` and the parameter columns.
+
+The `response_csv` file must have columns `REAL`, `ENSEMBLE` and the response columns \
+(and the columns to use as `response_filters`, if that option is used).
+
+
+**Using a response file per realization**
+
+Parameters are extracted automatically from the `parameters.txt` files in the individual
+realizations, using the `fmu-ensemble` library.
+
+The `response_file` must have the response columns (and the columns to use as `response_filters`, \
+if that option is used).
+
+
+**Using simulation time series data directly from `UNSMRY` files as responses**
+
+Parameters are extracted automatically from the `parameters.txt` files in the individual
+realizations, using the `fmu-ensemble` library.
+
+Responses are extracted automatically from the `UNSMRY` files in the individual realizations,
+using the `fmu-ensemble` library.
+
+!> The `UNSMRY` files are auto-detected by `fmu-ensemble` in the `eclipse/model` folder of the \
+individual realizations. You should therefore not have more than one `UNSMRY` file in this \
+folder, to avoid risk of not extracting the right data."""
+
+ # pylint:disable=too-many-arguments
+ def __init__(
+ self,
+ app,
+ parameter_csv: Path = None,
+ response_csv: Path = None,
+ ensembles: list = None,
+ response_file: str = None,
+ response_filters: dict = None,
+ response_ignore: list = None,
+ response_include: list = None,
+ parameter_ignore: list = None,
+ column_keys: list = None,
+ sampling: str = "monthly",
+ aggregation: str = "sum",
+ ):
+
+ super().__init__()
+
+ self.parameter_csv = parameter_csv if parameter_csv else None
+ self.response_csv = response_csv if response_csv else None
+ self.response_file = response_file if response_file else None
+ self.response_filters = response_filters if response_filters else {}
+ self.response_ignore = response_ignore if response_ignore else None
+ self.parameter_ignore = parameter_ignore if parameter_ignore else None
+ self.column_keys = column_keys
+ self.time_index = sampling
+ self.aggregation = aggregation
+
+ if response_ignore and response_include:
+ raise ValueError(
+ 'Incorrent argument. Either provide "response_include", '
+ '"response_ignore" or neither'
+ )
+ if parameter_csv and response_csv:
+ if ensembles or response_file:
+ raise ValueError(
+ 'Incorrect arguments. Either provide "csv files" or '
+ '"ensembles and response_file".'
+ )
+ self.parameterdf = read_csv(self.parameter_csv)
+ self.responsedf = read_csv(self.response_csv)
+
+ elif ensembles:
+ self.ens_paths = {
+ ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens]
+ for ens in ensembles
+ }
+ self.parameterdf = load_parameters(
+ ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet"
+ )
+ if self.response_file:
+ self.responsedf = load_csv(
+ ensemble_paths=self.ens_paths,
+ csv_file=response_file,
+ ensemble_set_name="EnsembleSet",
+ )
+ else:
+ self.responsedf = load_smry(
+ ensemble_paths=self.ens_paths,
+ column_keys=self.column_keys,
+ time_index=self.time_index,
+ )
+ self.response_filters["DATE"] = "single"
+ else:
+ raise ValueError(
+ 'Incorrect arguments.\
+ Either provide "csv files" or "ensembles and response_file".'
+ )
+ self.check_runs()
+ self.check_response_filters()
+ if response_ignore:
+ self.responsedf.drop(response_ignore, errors="ignore", axis=1, inplace=True)
+ if response_include:
+ self.responsedf.drop(
+ self.responsedf.columns.difference(
+ [
+ "REAL",
+ "ENSEMBLE",
+ *response_include,
+ *list(response_filters.keys()),
+ ]
+ ),
+ errors="ignore",
+ axis=1,
+ inplace=True,
+ )
+ if parameter_ignore:
+ self.parameterdf.drop(parameter_ignore, axis=1, inplace=True)
+
+ self.plotly_theme = app.webviz_settings["theme"].plotly_theme
+ self.set_callbacks(app)
+
+ @property
+ def tour_steps(self):
+ steps = [
+ {
+ "id": self.uuid("layout"),
+ "content": (
+ "Dashboard for parallel parameters plot"
+ "colored by the value of a response"
+ ),
+ },
+ {
+ "id": self.uuid("parameters"),
+ "content": (
+ "Lets you control what parameters to include in your plot. \n"
+ + "There are two modes, exclusive and subset: \n"
+ + "- Exclusive mode lets you remove specific parameters\n\n"
+ + "- Subset mode lets you pick a subset of parameters \n"
+ ),
+ },
+ {
+ "id": self.uuid("parallel-coords-plot"),
+ "content": (
+ "Plot showing the values of all the selected parameters at once."
+ ),
+ },
+ {"id": self.uuid("ensemble"), "content": ("Select the active ensemble."),},
+ {"id": self.uuid("responses"), "content": ("Select the active response."),},
+ {
+ "id": self.uuid("exclude_include"),
+ "content": (
+ "Choose if the parameter selector should be inclusive or exclusive"
+ ),
+ },
+ ]
+ return steps
+
+ @property
+ def responses(self):
+ """Returns valid responses. Filters out non numerical columns,
+ and filterable columns."""
+ responses = list(
+ self.responsedf.drop(["ENSEMBLE", "REAL"], axis=1)
+ .apply(pd.to_numeric, errors="coerce")
+ .dropna(how="all", axis="columns")
+ .columns
+ )
+ return [p for p in responses if p not in self.response_filters.keys()]
+
+ @property
+ def parameters(self):
+ """Returns numerical input parameters"""
+ parameters = list(
+ self.parameterdf.drop(["ENSEMBLE", "REAL"], axis=1)
+ .apply(pd.to_numeric, errors="coerce")
+ .dropna(how="all", axis="columns")
+ .columns
+ )
+ return parameters
+
+ @property
+ def ensembles(self):
+ """Returns list of ensembles"""
+ return list(self.parameterdf["ENSEMBLE"].unique())
+
+ def check_runs(self):
+ """Check that input parameters and response files have
+ the same number of runs"""
+ for col in ["ENSEMBLE", "REAL"]:
+ if sorted(list(self.parameterdf[col].unique())) != sorted(
+ list(self.responsedf[col].unique())
+ ):
+ raise ValueError("Parameter and response files have different runs")
+
+ def check_response_filters(self):
+ """Check that provided response filters are valid"""
+ if self.response_filters:
+ for col_name, col_type in self.response_filters.items():
+ if col_name not in self.responsedf.columns:
+ raise ValueError(f"{col_name} is not in response file")
+ if col_type not in ["single", "multi", "range"]:
+ raise ValueError(
+ f"Filter type {col_type} for {col_name} is not valid."
+ )
+
+ @property
+ def filter_layout(self):
+ """Layout to display selectors for response filters"""
+ children = []
+ for col_name, col_type in self.response_filters.items():
+ values = list(self.responsedf[col_name].unique())
+ if col_type == "multi":
+ selector = wcc.Select(
+ id=self.uuid(f"filter-{col_name}"),
+ options=[{"label": val, "value": val} for val in values],
+ value=values,
+ multi=True,
+ size=min(20, len(values)),
+ )
+ elif col_type == "single":
+ selector = dcc.Dropdown(
+ id=self.uuid(f"filter-{col_name}"),
+ options=[{"label": val, "value": val} for val in values],
+ value=values[0],
+ multi=False,
+ clearable=False,
+ )
+ children.append(html.Div(children=[html.Label(col_name), selector,]))
+ return children
+
+ @property
+ def control_layout(self):
+ """Layout to select e.g. iteration and response"""
+ return [
+ html.Div(
+ [
+ html.Div("Ensemble:", style={"font-weight": "bold"}),
+ dcc.Dropdown(
+ id=self.uuid("ensemble"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.ensembles
+ ],
+ clearable=False,
+ value=self.ensembles[0],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div("Response:", style={"font-weight": "bold"}),
+ dcc.Dropdown(
+ id=self.uuid("responses"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.responses
+ ],
+ clearable=False,
+ value=self.responses[0],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ html.Div(
+ "Parameters:",
+ id=self.uuid("parameters"),
+ style={
+ "font-weight": "bold",
+ "display": "inline-block",
+ "margin-right": "10px",
+ },
+ ),
+ dcc.RadioItems(
+ id=self.uuid("exclude_include"),
+ options=[
+ {"label": "Exclusive mode", "value": "exc"},
+ {"label": "Subset mode", "value": "inc"},
+ ],
+ value="exc",
+ labelStyle={"display": "inline-block"},
+ style={"fontSize": ".80em"},
+ ),
+ ]
+ ),
+ html.Div(
+ [
+ wcc.Select(
+ id=self.uuid("parameter-list"),
+ options=[
+ {"label": ens, "value": ens} for ens in self.parameters
+ ],
+ multi=True,
+ size=10,
+ value=[],
+ style={"marginBottom": "20px"},
+ ),
+ ]
+ ),
+ html.Div("Filters:", style={"font-weight": "bold"}),
+ html.Div(children=self.filter_layout),
+ ]
+
+ @property
+ def layout(self):
+ """Main layout"""
+ return wcc.FlexBox(
+ id=self.uuid("layout"),
+ children=[
+ html.Div(style={"flex": 1}, children=self.control_layout),
+ html.Div(
+ style={"flex": 3},
+ children=wcc.Graph(id=self.uuid("parallel-coords-plot")),
+ ),
+ ],
+ )
+
+ @property
+ def parallel_coords_callback_inputs(self):
+ """List of Inputs for parallel parameters callback"""
+ inputs = [
+ Input(self.uuid("exclude_include"), "value"),
+ Input(self.uuid("parameter-list"), "value"),
+ Input(self.uuid("ensemble"), "value"),
+ Input(self.uuid("responses"), "value"),
+ ]
+ if self.response_filters:
+ for col_name in self.response_filters:
+ inputs.append(Input(self.uuid(f"filter-{col_name}"), "value"))
+ return inputs
+
+ def make_response_filters(self, filters):
+ """Returns a list of active response filters"""
+ filteroptions = []
+ if filters:
+ for i, (col_name, col_type) in enumerate(self.response_filters.items()):
+ filteroptions.append(
+ {"name": col_name, "type": col_type, "values": filters[i]}
+ )
+ return filteroptions
+
+ def set_callbacks(self, app):
+
+ """Set callback for parallel coordinates plot"""
+
+ @app.callback(
+ Output(self.uuid("parallel-coords-plot"), "figure"),
+ self.parallel_coords_callback_inputs,
+ )
+ def _update_parallel_coordinate_plot(
+ exc_inc, parameter_list, ensemble, response, *filters
+ ):
+ """
+ Callback to update the parallel coordinates plot
+ 1. Filter Dataframes according to chosen filter options
+ 5. Generate updated parallel parameters plot.
+ """
+
+ # filtering
+ filteroptions = self.make_response_filters(filters)
+ responsedf = filter_and_sum_responses(
+ self.responsedf,
+ ensemble,
+ response,
+ filteroptions=filteroptions,
+ aggregation=self.aggregation,
+ )
+ if exc_inc == "exc":
+ parameterdf = self.parameterdf.drop(parameter_list, axis=1)
+ elif exc_inc == "inc":
+ parameterdf = self.parameterdf[["ENSEMBLE", "REAL"] + parameter_list]
+
+ parameterdf = parameterdf.loc[self.parameterdf["ENSEMBLE"] == ensemble]
+ df = pd.merge(responsedf, parameterdf, on=["REAL"]).drop(
+ columns=["REAL", "ENSEMBLE"]
+ )
+
+ # plot generation
+ pallete = self.plotly_theme["layout"]["colorway"]
+ colmap = [(0, pallete[0]), (1, pallete[2])]
+ dims = [{"label": param, "values": df[param]} for param in df]
+ data = [
+ {
+ "type": "parcoords",
+ "line": {
+ "color": df[response],
+ "colorscale": colmap,
+ "showscale": True,
+ "colorbar": {
+ "title": response,
+ "xanchor": "right",
+ "x": -0.02,
+ },
+ },
+ "dimensions": dims,
+ "labelangle": 45,
+ "labelside": "bottom",
+ }
+ ]
+ layout = {}
+ layout.update(self.plotly_theme["layout"])
+ # Ensure sufficient spacing between each dimension and margin for labels
+ width = len(dims) * 100 + 250
+ layout.update(
+ {"width": width, "height": 1200, "margin": {"b": 740, "t": 30}}
+ )
+ return {"data": data, "layout": layout}
+
+ def add_webvizstore(self):
+ if self.parameter_csv and self.response_csv:
+ return [
+ (read_csv, [{"csv_file": self.parameter_csv,}],),
+ (read_csv, [{"csv_file": self.response_csv,}],),
+ ]
+ return [
+ (
+ load_parameters,
+ [
+ {
+ "ensemble_paths": self.ens_paths,
+ "ensemble_set_name": "EnsembleSet",
+ }
+ ],
+ ),
+ (
+ load_csv,
+ [
+ {
+ "ensemble_paths": self.ens_paths,
+ "csv_file": self.response_file,
+ "ensemble_set_name": "EnsembleSet",
+ }
+ ],
+ ),
+ ]
+
+
+@CACHE.memoize(timeout=CACHE.TIMEOUT)
+@webvizstore
+def read_csv(csv_file) -> pd.DataFrame:
+ return pd.read_csv(csv_file, index_col=False)