From be8e0dd5a947acae6258489316780320650d9c0d Mon Sep 17 00:00:00 2001 From: JosteinGj Date: Wed, 1 Jul 2020 10:24:02 +0200 Subject: [PATCH 1/5] added basic working regression plugin --- setup.py | 2 + webviz_subsurface/plugins/__init__.py | 4 + webviz_subsurface/plugins/_test2_plug.py | 611 +++++++++++++++++++++++ 3 files changed, 617 insertions(+) create mode 100644 webviz_subsurface/plugins/_test2_plug.py diff --git a/setup.py b/setup.py index e88f4c63c..d0cc235fb 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,8 @@ "ReservoirSimulationTimeSeriesRegional = " + "webviz_subsurface.plugins:ReservoirSimulationTimeSeriesRegional", "RftPlotter = webviz_subsurface.plugins:RftPlotter", + "ExamplePlugin = webviz_subsurface.plugins:ExamplePlugin", + "ExamplePlugin2 = webviz_subsurface.plugins:ExamplePlugin2" ] }, install_requires=[ diff --git a/webviz_subsurface/plugins/__init__.py b/webviz_subsurface/plugins/__init__.py index e5ca24710..e83ecd1c7 100644 --- a/webviz_subsurface/plugins/__init__.py +++ b/webviz_subsurface/plugins/__init__.py @@ -49,6 +49,8 @@ ReservoirSimulationTimeSeriesRegional, ) from ._rft_plotter.rft_plotter import RftPlotter +from ._test_plug import ExamplePlugin +from ._test2_plug import ExamplePlugin2 __all__ = [ @@ -72,4 +74,6 @@ "RelativePermeability", "ReservoirSimulationTimeSeriesRegional", "RftPlotter", + "ExamplePlugin", + "ExamplePlugin2" ] diff --git a/webviz_subsurface/plugins/_test2_plug.py b/webviz_subsurface/plugins/_test2_plug.py new file mode 100644 index 000000000..7be7de478 --- /dev/null +++ b/webviz_subsurface/plugins/_test2_plug.py @@ -0,0 +1,611 @@ +from uuid import uuid4 +from pathlib import Path + +import numpy as np +import pandas as pd +from plotly.subplots import make_subplots +from dash.exceptions import PreventUpdate +from dash.dependencies import Input, Output +import dash_html_components as html +import dash_core_components as dcc +import webviz_core_components as wcc +from webviz_config.webviz_store import webvizstore +from webviz_config.common_cache import CACHE +from webviz_config import WebvizPluginABC +from webviz_config.utils import calculate_slider_step +import statsmodels.formula.api as smf +from sklearn.preprocessing import PolynomialFeatures + +from .._datainput.fmu_input import load_parameters, load_csv + + +class ExamplePlugin2(WebvizPluginABC): + + # pylint:disable=too-many-arguments + # plug-in tar in enten en csv fil eller en ensemble og div filter + def __init__( + self, + app, + parameter_csv: Path = None, + response_csv: Path = None, + ensembles: list = None, + response_file: str = None, + response_filters: dict = None, + response_ignore: list = None, + response_include: list = None, + parameter_filters: dict = None, + parameter_ignore: list = None, + parameter_include: list = None, + aggregation: str = "sum", + ): + + super().__init__() + self.parameter_csv = parameter_csv if parameter_csv else None + self.response_csv = response_csv if response_csv else None + self.response_file = response_file if response_file else None + self.response_filters = response_filters if response_filters else {} + self.response_ignore = response_ignore if response_ignore else None + + self.aggregation = aggregation + + if response_ignore and response_include: + raise ValueError( + 'Incorrent argument. either provide "response_include", ' + '"response_ignore" or neither' + ) + if parameter_csv and response_csv: + if ensembles or response_file: + raise ValueError( + 'Incorrect arguments. Either provide "csv files" or ' + '"ensembles and response_file".' + ) + self.parameterdf = pd.read_csv(self.parameter_csv) + self.responsedf = pd.read_csv(self.response_csv) +# her lager vi parameter og response DataFrames + elif ensembles and response_file: + self.ens_paths = { + ens: app.webviz_settings[ + "shared_settings"]["scratch_ensembles"][ens] + for ens in ensembles + } + self.parameterdf = load_parameters( + ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet" + ) + self.responsedf = load_csv( + ensemble_paths=self.ens_paths, + csv_file=response_file, + ensemble_set_name="EnsembleSet", + ) + else: + raise ValueError( + """Incorrect arguments. + Either provide "csv files" or "ensembles and response_file".""" + ) + + self.check_runs() + self.check_response_filters() + if response_ignore: + self.responsedf.drop( + response_ignore, + errors="ignore", + axis=1, + inplace=True) + + if response_include: + self.responsedf.drop( + self.responsedf.columns.difference( + [ + "REAL", + "ENSEMBLE", + *response_include, + *list(response_filters.keys()), + ] + ), + errors="ignore", + axis=1, + inplace=True, + ) + + self.plotly_theme = app.webviz_settings["theme"].plotly_theme + self.uid = uuid4() + self.set_callbacks(app) + + def ids(self, element): + """Generate unique id for dom element""" + return f"{element}-id-{self.uid}" + + @property + def responses(self): + """Returns valid responses. Filters out non numerical columns, + and filterable columns""" + responses = list( + self.responsedf.drop(["ENSEMBLE", "REAL"], axis=1) + .apply(pd.to_numeric, errors="coerce") + .dropna(how="all", axis="columns") + .columns + ) + return [p for p in responses if p not in self.response_filters.keys()] + + @property + def parameters(self): + """Returns numerical input parameters""" + parameters = list( + self.parameterdf.drop(["ENSEMBLE", "REAL"], axis=1) + .apply(pd.to_numeric, errors="coerce") + .dropna(how="all", axis="columns") + .columns + ) + return parameters + + @property + def ensembles(self): + """Returns list of ensembles""" + return list(self.parameterdf["ENSEMBLE"].unique()) + + def check_runs(self): + """Check that input parameters and response files have + the same number of runs""" + for col in ["ENSEMBLE", "REAL"]: + if sorted(list(self.parameterdf[col].unique())) != sorted( + list(self.responsedf[col].unique()) + ): + raise ValueError("Parameter and response\ + files have different runs") + + def check_response_filters(self): + """'Check that provided response filters are valid""" + if self.response_filters: + for col_name, col_type in self.response_filters.items(): + if col_name not in self.responsedf.columns: + raise ValueError(f"{col_name} is not in response file") + if col_type not in ["single", "multi", "range"]: + raise ValueError( + f"Filter type {col_type} for {col_name} is not valid." + ) + + @property + def filter_layout(self): + """Layout to display selectors for response filters""" + children = [] + for col_name, col_type in self.response_filters.items(): + domid = self.ids(f"filter-{col_name}") + values = list(self.responsedf[col_name].unique()) + if col_type == "multi": + selector = wcc.Select( + id=domid, + options=[{"label": val, "value": val} for val in values], + value=values, + multi=True, + size=min(20, len(values)), + ) + elif col_type == "single": + selector = dcc.Dropdown( + id=domid, + options=[{"label": val, "value": val} for val in values], + value=values[0], + multi=False, + clearable=False, + ) + elif col_type == "range": + selector = make_range_slider( + domid, + self.responsedf[col_name], + col_name) + else: + return children + children.append(html.Div( + children=[html.Label(col_name), + selector, ])) + + return children + + @property + def control_layout(self): + """Layout to select e.g. iteration and response""" + return [ + html.Div( + [ + html.Label("Ensemble"), + dcc.Dropdown( + id=self.ids("ensemble"), + options=[ + {"label": ens, + "value": ens} for ens in self.ensembles + ], + clearable=False, + value=self.ensembles[0], + ), + ] + ), + html.Div( + [ + html.Label("Response"), + dcc.Dropdown( + id=self.ids("responses"), + options=[ + {"label": ens, + "value": ens} for ens in self.responses + ], + clearable=False, + value=self.responses[0], + ), + ] + ), + html.Div( + [ + html.Label("Interaction"), + dcc.RadioItems( + id=self.ids("interaction"), + options=[ + {"label": "on", "value": True}, + {"label": "off", "value": False} + ], + value=True + ) + ] + ), + ] + + @property + def correlation_input_callbacks(self): + """List of Inputs for correlation callback""" + callbacks = [ + Input(self.ids("ensemble"), "value"), + Input(self.ids("responses"), "value"), + ] + if self.response_filters: + for col_name in self.response_filters: + callbacks.append( + Input(self.ids(f"filter-{col_name}"), "value")) + return callbacks + + def make_response_filters(self, filters): + """Returns a list of active response filters""" + filteroptions = [] + if filters: + for i, (col_name, col_type) in enumerate(self.response_filters.items()): + filteroptions.append( + {"name": col_name, "type": col_type, "values": filters[i]} + ) + return filteroptions + + def forward_selected(data, response, maxvars=3): + # TODO find way to remove non-significant variables form entering model. + """Linear model designed by forward selection. + + Parameters: + ----------- + data : pandas DataFrame with all possible predictors and response + + response: string, name of response column in data + + Returns: + -------- + model: an "optimal" fitted statsmodels linear model + with an intercept + selected by forward selection + evaluated by adjusted R-squared + """ + remaining = set(data.columns) + remaining.remove(response) + selected = [] + + current_score, best_new_score = 0.0, 0.0 + while remaining and current_score == best_new_score and len(selected) < maxvars: + scores_with_candidates = [] + for candidate in remaining: + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected + [candidate])) + score = smf.ols(formula, data).fit().rsquared_adj + scores_with_candidates.append((score, candidate)) + scores_with_candidates.sort() + best_new_score, best_candidate = scores_with_candidates.pop() + if current_score < best_new_score: + remaining.remove(best_candidate) + selected.append(best_candidate) + current_score = best_new_score + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected)) + model = smf.ols(formula, data).fit() + return model + + @property + def model_input_callbacks(self): + hollabacks = [ + # Input(self.ids("initial-parameter"), "data"), + Input(self.ids("ensemble"), "value"), + Input(self.ids("responses"),"value"), + Input(self.ids("interaction"), "value") + ] + if self.response_filters: + for col_name in self.response_filters: + hollabacks.append(Input(self.ids(f"filter-{col_name}"), "value")) + return hollabacks + + + def set_callbacks(self, app): + @app.callback( + [ + Output(self.ids("p-values-graph"), "figure") + ], + self.model_input_callbacks, + ) + def update_pvalue_plot(ensemble, response, interaction, *filters): + filteroptions = self.make_response_filters(filters) + responsedf = filter_and_sum_responses( + self.responsedf, + ensemble, + response, + filteroptions=filteroptions, + aggregation=self.aggregation, + ) + parameter_filters=[ + 'RMSGLOBPARAMS:FWL', + 'MULTFLT:MULTFLT_F1', + 'MULTFLT:MULTFLT_F2', + 'MULTFLT:MULTFLT_F3', + 'MULTFLT:MULTFLT_F4', + 'MULTFLT:MULTFLT_F5', + 'MULTZ:MULTZ_MIDREEK', + 'INTERPOLATE_RELPERM:INTERPOLATE_GO', + 'INTERPOLATE_RELPERM:INTERPOLATE_WO', + 'LOG10_MULTFLT:MULTFLT_F1', + 'LOG10_MULTFLT:MULTFLT_F2', + 'LOG10_MULTFLT:MULTFLT_F3', + 'LOG10_MULTFLT:MULTFLT_F4', + 'LOG10_MULTFLT:MULTFLT_F5', + 'LOG10_MULTZ:MULTZ_MIDREEK', + "RMSGLOBPARAMS:COHIBA_MODEL_MODE", + "COHIBA_MODEL_MODE"] + parameterdf = self.parameterdf.loc[self.parameterdf["ENSEMBLE"] == ensemble] + param_df = parameterdf.drop(columns=parameter_filters) + df = pd.merge(responsedf, param_df, on=["REAL"]).drop(columns=["REAL", "ENSEMBLE"]) + model = gen_model(df, response, 9, interaction) + return make_p_values_plot(model) + + + + + + @property + def layout(self): + """Main layout""" + return wcc.FlexBox( + id=self.ids("layout"), + children=[ + html.Div( + style={'flex': 2}, + children=wcc.Graph( + id=self.ids('p-values-graph'), + figure={ + "data": [{"type": "bar", "x": [1, 2, 3],"y": [1, 3, 2]}], + "layout": {"title": {"text": "A Figure Specified By Python Dictionary"}} + } + ) + ), + html.Div( + style={"flex": 1}, + children=self.control_layout + self.filter_layout + if self.response_filters + else [], + ), + ], + ) + +def make_p_values_plot(model): + """ Sorting the dictionary in ascending order and making lists for parameters and p-values """ + p_sorted = model.pvalues.sort_values() + parameters = p_sorted.index + values = p_sorted.values + + """ Making an array for the corresponding colors """ + + colors = ["#FF1243" if val<0.05 else "slate-gray" for val in values] + + dict_fig = dict( + {"data": [ + { + "type": "bar", + "x": parameters, + "y": values, + "marker": {"color": colors} + }], + }) + return [dict_fig] +""" +@CACHE.memoize(timeout=CACHE.TIMEOUT) +def generate_model(ensemble, response, interaction, *filters): + + filteroptions = self.make_response_filters(filters) + responsedf = filter_and_sum_responses( + self.responsedf, + ensemble, + response, + filteroptions=filteroptions, + aggregation=self.aggregation, + ) + parameterdf = self.parameterdf.loc[ + self.parameterdf["ENSEMBLE"] == ensemble] + df = pd.merge(responsedf, parameterdf, on=["REAL"]) + return model(df, response, interaction) +""" +@CACHE.memoize(timeout=CACHE.TIMEOUT) +def filter_and_sum_responses( + dframe, ensemble, response, filteroptions=None, aggregation="sum" +): + """Cached wrapper for _filter_and_sum_responses""" + return _filter_and_sum_responses( + dframe=dframe, + ensemble=ensemble, + response=response, + filteroptions=filteroptions, + aggregation=aggregation, + ) + + +def _filter_and_sum_responses( + dframe, ensemble, response, filteroptions=None, aggregation="sum", +): + """Filter response dataframe for the given ensemble + and optional filter columns. Returns dataframe grouped and + aggregated per realization.""" + + df = dframe.copy() + df = df.loc[df["ENSEMBLE"] == ensemble] + if filteroptions: + for opt in filteroptions: + if opt["type"] == "multi" or opt["type"] == "single": + if isinstance(opt["values"], list): + df = df.loc[df[opt["name"]].isin(opt["values"])] + else: + df = df.loc[df[opt["name"]] == opt["values"]] + + elif opt["type"] == "range": + df = df.loc[ + (df[opt["name"]] >= np.min(opt["values"])) + & (df[opt["name"]] <= np.max(opt["values"])) + ] + if aggregation == "sum": + return df.groupby("REAL").sum().reset_index()[["REAL", response]] + if aggregation == "mean": + return df.groupby("REAL").mean().reset_index()[["REAL", response]] + raise ValueError( + f"Aggregation of response file specified as '{aggregation}'' is invalid. " + ) + + +def gen_model( + df: pd.DataFrame, + response: str, + max_vars: int=9, + interaction: bool=False): + + if interaction: + df = gen_interaction_df(df, response) + return forward_selected_interaction(df, response, maxvars=max_vars) + else: + return forward_selected(df, response, maxvars=max_vars) + +def gen_interaction_df( + df: pd.DataFrame, + response: str, + degree: int=2, + inter_only: bool=False, + bias: bool=False): + + x_interaction = PolynomialFeatures( + degree=2, + interaction_only=inter_only, + include_bias=False).fit_transform(df.drop(columns=response)) + interaction_df = pd.DataFrame( + x_interaction, + columns=gen_column_names( + df.drop(columns=response), + inter_only)) + return interaction_df.join(df[response]) + +def forward_selected_interaction(data, response, maxvars=9): + """Linear model designed by forward selection. + + Parameters: + ----------- + data : pandas DataFrame with all possible predictors and response + + response: string, name of response column in data + + Returns: + -------- + model: an "optimal" fitted statsmodels linear model + with an intercept + selected by forward selection + evaluated by adjusted R-squared + """ + remaining = set(data.columns) + remaining.remove(response) + selected = [] + current_score, best_new_score = 0.0, 0.0 + while remaining and current_score == best_new_score and len(selected) < maxvars: + scores_with_candidates = [] + for candidate in remaining: + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected + [candidate])) + score = smf.ols(formula, data).fit().rsquared_adj + scores_with_candidates.append((score, candidate)) + scores_with_candidates.sort() + best_new_score, best_candidate = scores_with_candidates.pop() + if current_score < best_new_score: + candidate_split = best_candidate.split(sep=":") + if len(candidate_split) == 2: + if candidate_split[0] not in selected and candidate_split[0] in remaining: + remaining.remove(candidate_split[0]) + selected.append(candidate_split[0]) + maxvars += 1 + if candidate_split[1] not in selected and candidate_split[1] in remaining: + remaining.remove(candidate_split[1]) + selected.append(candidate_split[1]) + maxvars += 1 + remaining.remove(best_candidate) + selected.append(best_candidate) + current_score = best_new_score + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected)) + model = smf.ols(formula, data).fit() + return model + +def forward_selected(data, response, maxvars=9): + # TODO find way to remove non-significant variables form entering model. + """Linear model designed by forward selection. + + Parameters: + ----------- + data : pandas DataFrame with all possible predictors and response + + response: string, name of response column in data + + Returns: + -------- + model: an "optimal" fitted statsmodels linear model + with an intercept + selected by forward selection + evaluated by adjusted R-squared + """ + remaining = set(data.columns) + remaining.remove(response) + selected = [] + + current_score, best_new_score = 0.0, 0.0 + while remaining and current_score == best_new_score and len(selected) < maxvars: + scores_with_candidates = [] + for candidate in remaining: + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected + [candidate])) + score = smf.ols(formula, data).fit().rsquared_adj + scores_with_candidates.append((score, candidate)) + scores_with_candidates.sort() + best_new_score, best_candidate = scores_with_candidates.pop() + if current_score < best_new_score: + remaining.remove(best_candidate) + selected.append(best_candidate) + current_score = best_new_score + formula = "{} ~ {} + 1".format(response, + ' + '.join(selected)) + model = smf.ols(formula, data).fit() + return model + +def gen_column_names(df, interaction_only): + output = list(df.columns) + if interaction_only: + for colname1 in df.columns: + for colname2 in df.columns: + if ( + (colname1 != colname2) and + (f"{colname1}:{colname2}" not in output) or + (f"{colname2}:{colname1}" not in output) + ): + output.append(f"{colname1}:{colname2}") + else: + for colname1 in df.columns: + for colname2 in df.columns: + if (f"{colname1}:{colname2}" not in output) and (f"{colname2}:{colname1}" not in output): + output.append(f"{colname1}:{colname2}") + return output From 0c7b1777846c9096144ed6574078befdf660408d Mon Sep 17 00:00:00 2001 From: "jostein.gj@gmail.com" Date: Thu, 2 Jul 2020 08:50:14 +0200 Subject: [PATCH 2/5] testing --- setup.py | 8 +- .../_datainput/inplace_volumes.py | 2 +- .../_private_plugins/tornado_plot.py | 226 +++++++----- .../plugins/._parameter_correlation.py.swp | Bin 0 -> 20480 bytes webviz_subsurface/plugins/__init__.py | 10 +- .../plugins/_multiple_regression_sara.py | 184 ++++++++++ .../plugins/_multiple_regression_sofie.py | 339 ++++++++++++++++++ webviz_subsurface/plugins/_p_values.py | 69 ++++ .../_parameter_response_correlation.py | 58 ++- webviz_subsurface/plugins/_test2_plug.py | 2 +- webviz_subsurface/plugins/_test_plug.py | 80 +++++ 11 files changed, 867 insertions(+), 111 deletions(-) create mode 100644 webviz_subsurface/plugins/._parameter_correlation.py.swp create mode 100644 webviz_subsurface/plugins/_multiple_regression_sara.py create mode 100644 webviz_subsurface/plugins/_multiple_regression_sofie.py create mode 100644 webviz_subsurface/plugins/_p_values.py create mode 100644 webviz_subsurface/plugins/_test_plug.py diff --git a/setup.py b/setup.py index d0cc235fb..755ef6f09 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,13 @@ + "webviz_subsurface.plugins:ReservoirSimulationTimeSeriesRegional", "RftPlotter = webviz_subsurface.plugins:RftPlotter", "ExamplePlugin = webviz_subsurface.plugins:ExamplePlugin", - "ExamplePlugin2 = webviz_subsurface.plugins:ExamplePlugin2" + "ExamplePlugin2 = webviz_subsurface.plugins:MultipleRegressionJostein", + "PValues = webviz_subsurface.plugins:PValues", + "MultipleRegressionSofie = webviz_subsurface.plugins:MultipleRegressionSofie", + "PlotCoefficientsSara = webviz_subsurface.plugins:PlotCoefficientsSara" + + + ] }, install_requires=[ diff --git a/webviz_subsurface/_datainput/inplace_volumes.py b/webviz_subsurface/_datainput/inplace_volumes.py index 496f7a034..bf549d6f7 100644 --- a/webviz_subsurface/_datainput/inplace_volumes.py +++ b/webviz_subsurface/_datainput/inplace_volumes.py @@ -28,7 +28,7 @@ def extract_volumes(ensemble_paths, volfolder, volfiles) -> pd.DataFrame: df["SOURCE"] = volname df["ENSEMBLE"] = ens_name ens_dfs.append(df) - except ValueError: + except KeyError: pass try: dfs.append(pd.concat(ens_dfs)) diff --git a/webviz_subsurface/_private_plugins/tornado_plot.py b/webviz_subsurface/_private_plugins/tornado_plot.py index 4804aa15f..2a925b44f 100644 --- a/webviz_subsurface/_private_plugins/tornado_plot.py +++ b/webviz_subsurface/_private_plugins/tornado_plot.py @@ -330,93 +330,7 @@ def tornado_plot( ) ]["VALUE"].mean() - # Group by sensitivity name/case and calculate average values for each case - arr = [] - for sens_name, sens_name_df in realizations.groupby(["SENSNAME"]): - # Excluding the reference case as well as any cases named `ref` - # `ref` is used as `SENSNAME`, typically for a single realization only, - # when no seed uncertainty is used - if sens_name == "ref": - continue - - # If `SENSTYPE` is scalar grab the mean for each `SENSCASE` - if sens_name_df["SENSTYPE"].all() == "scalar": - for sens_case, sens_case_df in sens_name_df.groupby(["SENSCASE"]): - values = data.loc[data["REAL"].isin(sens_case_df["REAL"])][ - "VALUE" - ].mean() - - arr.append( - { - "sensname": sens_name, - "senscase": sens_case, - "values": values, - "values_ref": scale_to_ref(values, ref_avg, scale), - "reals": list(map(int, sens_case_df["REAL"])), - } - ) - # If `SENSTYPE` is monte carlo get p10, p90 - elif sens_name_df["SENSTYPE"].all() == "mc": - # Get data for relevant realizations - case_df = data.loc[data["REAL"].isin(sens_name_df["REAL"])] - - # Calculate p90(low) and p10(high) - p90 = case_df["VALUE"].quantile(0.10) - p10 = case_df["VALUE"].quantile(0.90) - - # Extract list of realizations with values less then reference avg (low) - low_reals = list(map(int, case_df.loc[case_df["VALUE"] <= ref_avg]["REAL"])) - - # Extract list of realizations with values higher then reference avg (high) - high_reals = list(map(int, case_df.loc[case_df["VALUE"] > ref_avg]["REAL"])) - - arr.append( - { - "sensname": sens_name, - "senscase": "P90", - "values": p90, - "values_ref": scale_to_ref(p90, ref_avg, scale), - "reals": low_reals, - } - ) - arr.append( - { - "sensname": sens_name, - "senscase": "P10", - "values": p10, - "values_ref": scale_to_ref(p10, ref_avg, scale), - "reals": high_reals, - } - ) - else: - raise ValueError( - f"Sensitivities should be either 'scalar'or 'mc'. \ - Sensitivity: '{sens_name}' is neither." - ) - - # Group by sensitivity name and calculate low / high values - arr2 = [] - for sensname, sens_name_df in pd.DataFrame(arr).groupby(["sensname"]): - low = sens_name_df.loc[sens_name_df["values_ref"].idxmin()] - high = sens_name_df.loc[sens_name_df["values_ref"].idxmax()] - - arr2.append( - { - "low": calc_low_x(low["values_ref"], high["values_ref"]), - "low_base": calc_low_base(low["values_ref"], high["values_ref"]), - "low_label": low["senscase"], - "true_low": low["values"], - "low_reals": low["reals"], - "sensname": sensname, - "high": calc_high_x(low["values_ref"], high["values_ref"]), - "high_base": calc_high_base(low["values_ref"], high["values_ref"]), - "high_label": high["senscase"], - "true_high": high["values"], - "high_reals": high["reals"], - } - ) - - df = pd.DataFrame(arr2) + df = calc_tornado_df(data, realizations, ref_avg, scale) # Drops sensitivities smaller than reference if specified if cutbyref and df["sensname"].str.contains(reference).any(): @@ -432,8 +346,13 @@ def tornado_plot( for sensname, sens_name_df in df.groupby(["sensname"]) } - # If percentage, unit is % - unit_x = "%" if scale == "Percentage" else unit + # If percentage, unit is % and we turn off SI-prefix + if scale == "Percentage": + unit_x = "%" + locked_si_prefix_relative = 0 + else: + unit_x = unit + locked_si_prefix_relative = locked_si_prefix # Return tornado data as Plotly figure plot_data = [ dict( @@ -444,13 +363,15 @@ def tornado_plot( base=df["low_base"], customdata=df["low_reals"], hovertext=[ - f"{si_prefixed(x, number_format, unit_x, spaced, locked_si_prefix)}" + f"{si_prefixed(x, number_format, unit_x, spaced, locked_si_prefix_relative)}" f"
Case: {label}
True Value: " f"{si_prefixed(val, number_format, unit, spaced, locked_si_prefix)}" f"
Realizations: " f"{printable_int_list(reals)}" + if reals + else None for x, label, val, reals in zip( - df["low"], df["low_label"], df["true_low"], df["low_reals"] + df["low_tooltip"], df["low_label"], df["true_low"], df["low_reals"], ) ], hoverinfo="text", @@ -464,13 +385,18 @@ def tornado_plot( base=df["high_base"], customdata=df["high_reals"], hovertext=[ - f"{si_prefixed(x, number_format, unit_x, spaced, locked_si_prefix)}" + f"{si_prefixed(x, number_format, unit_x, spaced, locked_si_prefix_relative)}" f"
Case: {label}
True Value: " f"{si_prefixed(val, number_format, unit, spaced, locked_si_prefix)}" f"
Realizations: " f"{printable_int_list(reals)}" + if reals + else None for x, label, val, reals in zip( - df["high"], df["high_label"], df["true_high"], df["high_reals"] + df["high_tooltip"], + df["high_label"], + df["true_high"], + df["high_reals"], ) ], hoverinfo="text", @@ -528,6 +454,120 @@ def tornado_plot( ) +# pylint: disable=too-many-locals +def calc_tornado_df(data, realizations, ref_avg, scale): + # Group by sensitivity name/case and calculate average values for each case + arr = [] + + for sens_name, sens_name_df in realizations.groupby(["SENSNAME"]): + # Excluding the reference case as well as any cases named `ref` + # `ref` is used as `SENSNAME`, typically for a single realization only, + # when no seed uncertainty is used + if sens_name == "ref": + continue + + # If `SENSTYPE` is scalar grab the mean for each `SENSCASE` + if sens_name_df["SENSTYPE"].all() == "scalar": + for sens_case, sens_case_df in sens_name_df.groupby(["SENSCASE"]): + values = data.loc[data["REAL"].isin(sens_case_df["REAL"])][ + "VALUE" + ].mean() + + arr.append( + { + "sensname": sens_name, + "senscase": sens_case, + "values": values, + "values_ref": scale_to_ref(values, ref_avg, scale), + "reals": list(map(int, sens_case_df["REAL"])), + } + ) + # If `SENSTYPE` is monte carlo get p10, p90 + elif sens_name_df["SENSTYPE"].all() == "mc": + # Get data for relevant realizations + case_df = data.loc[data["REAL"].isin(sens_name_df["REAL"])] + + # Calculate p90(low) and p10(high) + p90 = case_df["VALUE"].quantile(0.10) + p10 = case_df["VALUE"].quantile(0.90) + + # Extract list of realizations with values less then reference avg (low) + low_reals = list(map(int, case_df.loc[case_df["VALUE"] <= ref_avg]["REAL"])) + + # Extract list of realizations with values higher then reference avg (high) + high_reals = list(map(int, case_df.loc[case_df["VALUE"] > ref_avg]["REAL"])) + + arr.append( + { + "sensname": sens_name, + "senscase": "P90", + "values": p90, + "values_ref": scale_to_ref(p90, ref_avg, scale), + "reals": low_reals, + } + ) + arr.append( + { + "sensname": sens_name, + "senscase": "P10", + "values": p10, + "values_ref": scale_to_ref(p10, ref_avg, scale), + "reals": high_reals, + } + ) + else: + raise ValueError( + f"Sensitivities should be either 'scalar'or 'mc'. \ + Sensitivity: '{sens_name}' is neither." + ) + + # Group by sensitivity name and calculate low / high values + arr2 = [] + for sensname, sens_name_df in pd.DataFrame(arr).groupby(["sensname"]): + low = sens_name_df.copy().loc[sens_name_df["values_ref"].idxmin()] + high = sens_name_df.copy().loc[sens_name_df["values_ref"].idxmax()] + if sens_name_df["senscase"].nunique() == 1: + # Single case sens, implies low == high, but testing just in case: + if low["values_ref"] != high["values_ref"]: + raise ValueError( + "For a single sensitivity case, low and high cases should be equal. Likely bug" + ) + if low["values_ref"] < 0: + # To avoid warnings for changing values of dataframe slices. + high = high.copy() + high["values_ref"] = 0 + high["reals"] = [] + high["senscase"] = None + high["values"] = ref_avg + else: + low = ( + low.copy() + ) # To avoid warnings for changing values of dataframe slices. + low["values_ref"] = 0 + low["reals"] = [] + low["senscase"] = None + low["values"] = ref_avg + + arr2.append( + { + "low": calc_low_x(low["values_ref"], high["values_ref"]), + "low_base": calc_low_base(low["values_ref"], high["values_ref"]), + "low_label": low["senscase"], + "low_tooltip": low["values_ref"], + "true_low": low["values"], + "low_reals": low["reals"], + "sensname": sensname, + "high": calc_high_x(low["values_ref"], high["values_ref"]), + "high_base": calc_high_base(low["values_ref"], high["values_ref"]), + "high_label": high["senscase"], + "high_tooltip": high["values_ref"], + "true_high": high["values"], + "high_reals": high["reals"], + } + ) + return pd.DataFrame(arr2) + + def calc_low_base(low, high): """ From the low and high value of a parameter, diff --git a/webviz_subsurface/plugins/._parameter_correlation.py.swp b/webviz_subsurface/plugins/._parameter_correlation.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..3ea8c555e3652ae931ca83b85904d5184793b2bb GIT binary patch literal 20480 zcmeI4eXJZ+6~G6OFR?9wSV$ysm`2@)@9ldnl+-6rDJ{?%N}JFRg|^DlX&!C5bYmT2imS5M571Q^sKuu61S)0 zcGR%zDod&Lq=a63#`WXTT4+ahOC>6*)q^Nfo}IXXUk&F6w_2u_8Ynejw3Eijh84Rm zT(?#~p=+%3&9l$iRVt>`K&gRJ1EmH^4U`%vHBf4x)Ih0$caR2>@MPmt-0z9mJ+H{l zC-j})m3_Cf^Qyl5SFP->vmK&gRJ z1EmH^4U`%vHBf4x)Ih0$QUj$1-hc*d$1r|}(_dN1#k~K|{r`X9;Qt+-fQR8uSb$wH z1TRwlXYh5n3*`ELbFs_$I@ki|!po%n1-=9dw!p_=0xp47@a%gG<6rQ7xCsuy$Kfnk z2`k`{Gmrzm1>c0v!Ui}8R=}U$Z5UsLufPp(JxswyI1~PQx?wyGPr}3S09*$<;5>LA ztbpfFGmPKE_uw|z59h#XaO70OcmDDq`!!EQK0kmeb93VsZCz#$lcGvJ9VSYj@B zxXwhZyhhb^;))g5?SwrPBlW-wB5P>a7<83WuU9umLFfbr{mQ@+&7|d3ud=6=SFtup zRc2PPIXypa4V!T?@2QDJs}cCg$U)`KG!tvww59{kv6cq3T5r0Z6DfaU@~GzWszTdw zTz^KYSs%_>OGVMOK%3p!qPPQEnkrtG-~JxgnNdkxb+X&8HKL$ZbKH7T=_*h8v1(0w zYGfUnGj)2N6c1=h>Ru*Uvg)2ki$*r;?!+hcM&7P&L0RJGBL zE8PZY*PQ6-?6d=p(a+hPG^|DCDcOd(G~zlVi@FEfuCruyq_!;UO69A_PL$d9-2-h! ze`)!gpk-!PQ%GMR>XbEw)H9rCqR&okx`9}fV0+%QUEd#9>@e)!OfCR-)JBerxX5oDI zQpX326gTZiIUO0TsY#Yy=(OIfb*dtf&)xB&bs0Ts~|*)j$BWY`Y(-SD_M9Rwat?2Ot<`b#?W0&{OsEqlk!&5TnqCwiQkotZzD z!ksoKx2NgWo84Ar9PjPZ!Co^7%vm>X+n&3is)k|vPG`v|U8nVJ`}5|EJF9#eb0afo z^fX#Eu4x7b<8BI)b+!SU?U>PE+J&_AQ6EnA$UUxEv&QT$FM6&v>Yg3P-MmfRiYt5c z7Udm!6Y|E(Hi^1Aqo6yIWx%}VaxPuMX(J+QN)~8#G~ao=hmKm%4OqnO?^(pnri(a| zW<}dQRl134VKDlZa~NG7;F>FoTC-XmZ3Znhy4=3(M)!zX=4%!`zAfC^{<(hGPDUH8 zHiMW?Y}_{8R)}$B7!;ok!_Z=8PdekT;m+h``?_O5Nvv|K75LbkdK2rFHnC~rrY)Bb zDx8~>B#1hN)3jKU(n}mWZdM%?D&J9l-BnoXwA!t5C&T9UcEZ`n5<7B73L{TJUpBs* zc5E`8<%Oh5nH&0xu#_njkqNA?7D>v>ggi!6%-_(-)BJV|-IC;onI^@!nwfOk&>G_Z zpNbC};G>HFpTB?q13vyQ;X(Kuc(4^d2(RGRKMPO758yTk;3}Aa4}bxO@$DaiFMtm< z*a=s|HaH(lct89{e0+Ea?tvPd4KL%{KL|I%9BhZHVIz#dDtHn9{!zFa?trg>!~zny z82*c2{}kK~x4~y%A8dm$SP3uT-~SEng}dNpNMIja0;}L@{QED$CfES$U@e>kC&CHv zBzpV_+zW?5^y))-lo}{CP-@^Et^rvdWlo4^&j-wvl9GO`wnN+_Rf$7YcN=cKmL{ib zt6~wo)So1(B~@5cy+vffyky9NSQpAFspv0wU43=ui?~dtMN1qsac$*1#X2i%g7vd* zYbjmxb=P}+eyuexF)AupvVT&I)NU*X)FU17_P z_!(U)BLA)*L%eDIp}%Qajn}4U(f~0x63tB8m9b&{XUHnXJgm^hH94Nri@M9D+tWVL zUsLVUcZE*VSzTw$(vlEf_7w8!lHD;d*_$hV7C$Xyx#ilP)w2Y)7FxE%EY^0VCrPVg zq>x4#y+#-3z-eUb^Pa$n4sv9fnfKpQr8Kc@W-=d&{8m%hNy`p1MaqEdI(zBGe9vlW z+4260A65yHcyeYb8CSAA9IQ|c4OMNTA)%67@#?Zz#2Pc4_lVbw;~K;|#OwznEn?Ry z(%S7bbX15!$G7>dWzy_-8;A7cMU38|$5Y=*Vj8^{WRF|Xk(|if%uDqmyowY7E?^~b zTZ+ecB2C@7b=&3JckLX)P?{XwCrtlDQo-!#bqg7mM5!s}&R@+Nl$+PFg zO^Hq1mPiq?Qn=f$Usw8W=qEe`Jf1d6YPh~~sybbojZL1kdYhlul4@@1y27zJt&)QZ z=R6My#OA!KJje8HKYd3L_q36B+z^((70e<{r!hhWjA)Th>gqc2>7X+01}~^mc^Z9v zk%lrwgJSAm$KZVhb3>nbH97RWz~d?QV3}7x%S|0xEmB8*SPP0^@3HGD z%r-`Tb$+&p4%pfrPL5SKtRFVl_E=9Tl+JML5V&i|C4JD{z3CDS^Neg*-zSllDsdA% zE6n2~dtFPc^Ez{~Az$-jkig4K(?)(xem-7ZzXjc)*9XVf9ddV`n!#>&hnx8SKj*#i zKHe{j|8Hjt)K~HIkHB+q819G9!#rF7FNuE-kHK%?*KjY~3^&0i;2c;DkK^Be8}5O- zVGrzvkHR>J|Nkhy{(W#av|$vgATfX!@bRCA--5jRUxWqlU^iR=&*A4k4G+L~L1F>7 zfDc!~hrodQ@b&M6JQnb2KD%%&Tn6XDEBN|{;Z`^R+u=O;1^)e=kiZn24A0}+{}~>I zd*D`R!fJSxxXNMp4tx?m3@5@fyxYGWZiE9c32R|BJVHNw19X36eNmp?4jRa9NB_C- z8m!!uQMBE;vdT>vMy`tfb7A~+7M$g#4B40TmTdznH)TZSri^TxQDM+!u0y#gqp(je z(@ME1LyUQ*jB-= np.min(opt["values"])) + & (df[opt["name"]] <= np.max(opt["values"])) + ] + if aggregation == "sum": + return df.groupby("REAL").sum().reset_index()[["REAL", response]] + if aggregation == "mean": + return df.groupby("REAL").mean().reset_index()[["REAL", response]] + raise ValueError( + f"Aggregation of response file specified as '{aggregation}'' is invalid. " + ) + +def theme_layout(theme, specific_layout): + layout = {} + layout.update(theme["layout"]) + layout.update(specific_layout) + return layout + + +@CACHE.memoize(timeout=CACHE.TIMEOUT) +@webvizstore +def read_csv(csv_file) -> pd.DataFrame: + return pd.read_csv(csv_file, index_col=False) diff --git a/webviz_subsurface/plugins/_p_values.py b/webviz_subsurface/plugins/_p_values.py new file mode 100644 index 000000000..db36e6591 --- /dev/null +++ b/webviz_subsurface/plugins/_p_values.py @@ -0,0 +1,69 @@ +import dash_html_components as html +import dash_core_components as dcc +import plotly.graph_objects as go +import webviz_core_components as wcc +from webviz_config import WebvizPluginABC + +""" Example output from the calculations of p-values in dictionary form """ +p_values = { + 'FWL' : 0.032, + 'INTERPOLATE_WO' : 0.867, + 'MULTIFLT_F3' : 0.231, + 'INTERPOLATE_GO' : 0.047, + 'MULTIFLT_F4' : 0.567 +} + +class PValues(WebvizPluginABC): + def __init__(self, app): + super().__init__() + + @property + def layout(self): + return wcc.FlexBox( + children=[ + html.Div( + style={'flex': 2}, + children=wcc.Graph( + id='p_values_plot', + figure=make_p_values_plot(self, p_values) + ) + ) + ]) + +def make_p_values_plot(self, p_values): + + """ Sorting the dictionary in ascending order and making lists for parameters and p-values """ + p_sorted = dict(sorted(p_values.items(), key=lambda x: x[1])) + parameters = list(p_sorted.keys()) + values = list(p_sorted.values()) + + """ Making the bar chart plot """ + fig = go.Figure([ + go.Bar( + x=parameters, + y=values, + marker_color=["#FF1243" if val<0.05 else "slategray" for val in values])]) + + fig.update_layout( + yaxis=dict(range=[0,1], title=f'p-values'), + xaxis=dict(title='Parameters'), + title='P-values for the key parameter combination', + autosize=False, + width=800, + height=600, + ) + + """ Adding a line at p = 0.05 """ + fig.add_shape( + type='line', + y0=0.05, + y1=0.05, + x0=-0.5, + x1=len(values)-0.5, + xref='x', + line=dict( + color='#222A2A', + width=2 + ) + ) + return fig \ No newline at end of file diff --git a/webviz_subsurface/plugins/_parameter_response_correlation.py b/webviz_subsurface/plugins/_parameter_response_correlation.py index 217af6c61..b8009eeb8 100644 --- a/webviz_subsurface/plugins/_parameter_response_correlation.py +++ b/webviz_subsurface/plugins/_parameter_response_correlation.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd from plotly.subplots import make_subplots -from dash.exceptions import PreventUpdate from dash.dependencies import Input, Output import dash_html_components as html import dash_core_components as dcc @@ -14,7 +13,7 @@ from webviz_config import WebvizPluginABC from webviz_config.utils import calculate_slider_step -from .._datainput.fmu_input import load_parameters, load_csv +from .._datainput.fmu_input import load_parameters, load_csv, load_smry class ParameterResponseCorrelation(WebvizPluginABC): @@ -36,14 +35,21 @@ class ParameterResponseCorrelation(WebvizPluginABC): * `parameter_csv`: Aggregated csvfile for input parameters with 'REAL' and 'ENSEMBLE' columns. * `response_csv`: Aggregated csvfile for response parameters with 'REAL' and 'ENSEMBLE' columns. -* `ensembles`: Which ensembles in `shared_settings` to visualize. +* `ensembles`: Which ensembles in `shared_settings` to visualize. If neither response_csv or + response_file is defined, the definition of ensembles implies that you want to + use simulation timeseries data directly from UNSMRY data. This also implies that + the date will be used as a response filter of type `single`. * `response_file`: Local (per realization) csv file for response parameters. * `response_filters`: Optional dictionary of responses (columns in csv file) that can be used as row filtering before aggregation. (See below for filter types). - * `response_ignore`: Response (columns in csv) to ignore (cannot use with response_include). * `response_include`: Response (columns in csv) to include (cannot use with response_ignore). -* `aggreation`: How to aggregate responses per realization. Either `sum` or `mean`. +* `column_keys`: Simulation vectors to use as responses read directly from UNSMRY-files in the + defined ensembles using fmu-ensemble (cannot use with response_file, + response_csv or parameters_csv). +* `sampling`: Sampling frequency if using fmu-ensemble to import simulation time series data. + (Only relevant if neither response_csv or response_file is defined). Default monthly +* `aggregation`: How to aggregate responses per realization. Either `sum` or `mean`. * `corr_method`: Correlation algorithm. Either `pearson` or `spearman`. The types of response_filters are: @@ -65,6 +71,8 @@ def __init__( response_filters: dict = None, response_ignore: list = None, response_include: list = None, + column_keys: list = None, + sampling: str = "monthly", aggregation: str = "sum", corr_method: str = "pearson", ): @@ -76,6 +84,8 @@ def __init__( self.response_file = response_file if response_file else None self.response_filters = response_filters if response_filters else {} self.response_ignore = response_ignore if response_ignore else None + self.column_keys = column_keys + self.time_index = sampling self.corr_method = corr_method self.aggregation = aggregation if response_ignore and response_include: @@ -92,7 +102,7 @@ def __init__( self.parameterdf = read_csv(self.parameter_csv) self.responsedf = read_csv(self.response_csv) - elif ensembles and response_file: + elif ensembles: self.ens_paths = { ens: app.webviz_settings["shared_settings"]["scratch_ensembles"][ens] for ens in ensembles @@ -100,11 +110,19 @@ def __init__( self.parameterdf = load_parameters( ensemble_paths=self.ens_paths, ensemble_set_name="EnsembleSet" ) - self.responsedf = load_csv( - ensemble_paths=self.ens_paths, - csv_file=response_file, - ensemble_set_name="EnsembleSet", - ) + if self.response_file: + self.responsedf = load_csv( + ensemble_paths=self.ens_paths, + csv_file=response_file, + ensemble_set_name="EnsembleSet", + ) + else: + self.responsedf = load_smry( + ensemble_paths=self.ens_paths, + column_keys=self.column_keys, + time_index=self.time_index, + ) + self.response_filters["DATE"] = "single" else: raise ValueError( 'Incorrect arguments. Either provide "csv files" or "ensembles and response_file".' @@ -412,7 +430,7 @@ def _update_distribution_graph( elif initial_parameter: parameter = initial_parameter else: - raise PreventUpdate + return {} filteroptions = self.make_response_filters(filters) responsedf = filter_and_sum_responses( self.responsedf, @@ -452,6 +470,17 @@ def add_webvizstore(self): "ensemble_set_name": "EnsembleSet", } ], + ) + if self.response_file + else ( + load_smry, + [ + { + "ensemble_paths": self.ens_paths, + "column_keys": self.column_keys, + "time_index": self.time_index, + } + ], ), ] @@ -485,7 +514,10 @@ def _filter_and_sum_responses( if isinstance(opt["values"], list): df = df.loc[df[opt["name"]].isin(opt["values"])] else: - df = df.loc[df[opt["name"]] == opt["values"]] + if opt["name"] == "DATE" and isinstance(opt["values"], str): + df = df.loc[df["DATE"].astype(str) == opt["values"]] + else: + df = df.loc[df[opt["name"]] == opt["values"]] elif opt["type"] == "range": df = df.loc[ diff --git a/webviz_subsurface/plugins/_test2_plug.py b/webviz_subsurface/plugins/_test2_plug.py index 7be7de478..b9973efe2 100644 --- a/webviz_subsurface/plugins/_test2_plug.py +++ b/webviz_subsurface/plugins/_test2_plug.py @@ -19,7 +19,7 @@ from .._datainput.fmu_input import load_parameters, load_csv -class ExamplePlugin2(WebvizPluginABC): +class MultipleRegressionJostein(WebvizPluginABC): # pylint:disable=too-many-arguments # plug-in tar in enten en csv fil eller en ensemble og div filter diff --git a/webviz_subsurface/plugins/_test_plug.py b/webviz_subsurface/plugins/_test_plug.py new file mode 100644 index 000000000..247b459c3 --- /dev/null +++ b/webviz_subsurface/plugins/_test_plug.py @@ -0,0 +1,80 @@ +from uuid import uuid4 + +import dash_html_components as html +from dash.dependencies import Input, Output +from webviz_config import WebvizPluginABC +from numpy.random import rand +from pathlib import Path + +class ExamplePlugin(WebvizPluginABC): + + def __init__(self, app): + super().__init__() + + @property + def layout(self): + return wcc.FlexBox( + children=[ + html.Div([ + html.H2('Multiple regression of parameters and responses') + ]), + html.Div( + style={'flex': 2}, + children=wcc.Graph( + id='p_values_plot', + figure=make_p_values_plot(self, p_values) + ) + ) + ]) + + def make_p_values_plot(self, p_values): + + """ Sorting the dictionary in ascending order and making lists for parameters and p-values """ + p_sorted = dict(sorted(p_values.items(), key=lambda x: x[1])) + parameters = list(p_sorted.keys()) + calc_p_values = list(p_sorted.values()) + + """ Making an array for the corresponding colors """ + col_values = [int(i*100) for i in calc_p_values] + colors = ['#FF1243']*len(parameters) # Red Equinor color + + for i, v in enumerate(col_values): + if v <= 5: + colors[i] = '#5AC864' # Green color + + """ Making the bar chart plot """ + fig = go.Figure([go.Bar(x=parameters, y=calc_p_values, marker_color=colors)]) + fig.update_layout( + yaxis=dict(range=[0,1], title=f'p-values'), + xaxis=dict(title='Parameters'), + title='P-values for the key parameter combination', + autosize=False, + width=800, + height=600, + ) + + """ Adding a line at p = 0.05 """ + fig.add_shape( + type='line', + y0=0.05, y1=0.05, x0=-0.5, x1=len(p_values.keys())-0.5, xref='x', + line=dict( + color='#222A2A', + width=2 + ) + ) + return fig + + +def load_data(parameter_path: Path = None, + timeseries_path: Path = None, + inplace_path: Path = None + ): + para_df = pd.read_parquet(parameter_path) + inpl_df = pd.read_parquet(inplace_path) + ts_df = pd.read_parquet(timeseries_path) + + ts_df.columns = [col.replace(":", "_") for col in ts_df.columns] + inpl_df.columns = [col.replace(":", "_") for col in inpl_df.columns] + para_df.columns = [col.replace(":", "_") for col in para_df.columns] + + return (para_df, inpl_df, ts_df) \ No newline at end of file From 072a91a976adb623c5c9849f8e5410c88c41144a Mon Sep 17 00:00:00 2001 From: "jostein.gj@gmail.com" Date: Thu, 2 Jul 2020 08:59:59 +0200 Subject: [PATCH 3/5] fixed typo --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a978fe7b9..64a8402de 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ + "webviz_subsurface.plugins:ReservoirSimulationTimeSeriesRegional", "RftPlotter = webviz_subsurface.plugins:RftPlotter", "ExamplePlugin = webviz_subsurface.plugins:ExamplePlugin", - "ExamplePlugin2 = webviz_subsurface.plugins:MultipleRegressionJostein", + "MultipleRegressionJostein = webviz_subsurface.plugins:MultipleRegressionJostein", "PValues = webviz_subsurface.plugins:PValues", "DataTablefromFit = webviz_subsurface.plugins:DataTablefromFit", "MultipleRegressionSofie = webviz_subsurface.plugins:MultipleRegressionSofie", From 22327236f6db50dbb1c469a9fd77f4d9b59575af Mon Sep 17 00:00:00 2001 From: "jostein.gj@gmail.com" Date: Thu, 2 Jul 2020 15:27:03 +0200 Subject: [PATCH 4/5] table looking good pvalue plot broken --- webviz_subsurface/plugins/__init__.py | 7 - webviz_subsurface/plugins/_test2_plug.py | 169 ++++++++++++++++------- 2 files changed, 121 insertions(+), 55 deletions(-) diff --git a/webviz_subsurface/plugins/__init__.py b/webviz_subsurface/plugins/__init__.py index f870c2ed5..5ba8b6283 100644 --- a/webviz_subsurface/plugins/__init__.py +++ b/webviz_subsurface/plugins/__init__.py @@ -53,16 +53,9 @@ from ._test2_plug import MultipleRegressionJostein from ._p_values import PValues from ._multiple_regression_sofie import MultipleRegressionSofie -<<<<<<< HEAD -<<<<<<< HEAD from ._multiple_regression_sara import PlotCoefficientsSara -======= ->>>>>>> 75c4b928e0c6ba699ef12a2f634b7e06adcf61ae -======= from ._multiple_regession_vegard import DataTablefromFit from ._multiple_regression_sara import PlotCoefficientsSara ->>>>>>> 7b86485ca4251de15f6823648d73334a125b8b04 - __all__ = [ "ParameterDistribution", diff --git a/webviz_subsurface/plugins/_test2_plug.py b/webviz_subsurface/plugins/_test2_plug.py index b9973efe2..52acd7087 100644 --- a/webviz_subsurface/plugins/_test2_plug.py +++ b/webviz_subsurface/plugins/_test2_plug.py @@ -15,6 +15,8 @@ from webviz_config.utils import calculate_slider_step import statsmodels.formula.api as smf from sklearn.preprocessing import PolynomialFeatures +from dash_table import DataTable +from dash_table.Format import Format from .._datainput.fmu_input import load_parameters, load_csv @@ -61,6 +63,7 @@ def __init__( ) self.parameterdf = pd.read_csv(self.parameter_csv) self.responsedf = pd.read_csv(self.response_csv) + # her lager vi parameter og response DataFrames elif ensembles and response_file: self.ens_paths = { @@ -135,6 +138,7 @@ def parameters(self): .dropna(how="all", axis="columns") .columns ) + parameters =[(param.replace(":","_") if ":" in param else param) for param in parameters] return parameters @property @@ -240,10 +244,42 @@ def control_layout(self): {"label": "on", "value": True}, {"label": "off", "value": False} ], - value=True + value=False ) ] ), + html.Div( + [ + html.Label("Force out"), + dcc.Dropdown( + id=self.ids("force out"), + options=[ + {"label": param, + "value": param} for param in self.parameters + ], + clearable=True, + multi=True, + value=["FWL", "INTERPOLATE_WO"], + + ) + ] + ), + html.Div( + [ + html.Label("number of variables"), + dcc.Input( + id=self.ids("nvars"), + type="number", + debounce=True, + placeholder="Max variables", + min=1, + max=len(self.parameterdf), + step=1, + value=9, + + ) + ] + ) ] @property @@ -315,22 +351,38 @@ def model_input_callbacks(self): # Input(self.ids("initial-parameter"), "data"), Input(self.ids("ensemble"), "value"), Input(self.ids("responses"),"value"), - Input(self.ids("interaction"), "value") + Input(self.ids("interaction"), "value"), + Input(self.ids("force out"), "value"), + Input(self.ids("nvars"), "value") ] if self.response_filters: for col_name in self.response_filters: hollabacks.append(Input(self.ids(f"filter-{col_name}"), "value")) return hollabacks + @property + def table_input_callbacks(self): + """List of Inputs for table callback""" + callbacks = [ + Input(self.ids("ensemble"), "value"), + Input(self.ids("responses"), "value"), + ] + if self.response_filters: + for col_name in self.response_filters: + callbacks.append(Input(self.ids(f"filter-{col_name}"), "value")) + return callbacks def set_callbacks(self, app): @app.callback( [ - Output(self.ids("p-values-graph"), "figure") + Output(self.ids("p-values-graph"), "figure"), + Output(self.ids("table"), "data"), + Output(self.ids("table"), "columns"), + Output(self.ids("table_title"), "children"), ], self.model_input_callbacks, ) - def update_pvalue_plot(ensemble, response, interaction, *filters): + def update_model_plot(ensemble, response, interaction, force_out, nvars, *filters): filteroptions = self.make_response_filters(filters) responsedf = filter_and_sum_responses( self.responsedf, @@ -339,58 +391,76 @@ def update_pvalue_plot(ensemble, response, interaction, *filters): filteroptions=filteroptions, aggregation=self.aggregation, ) - parameter_filters=[ - 'RMSGLOBPARAMS:FWL', - 'MULTFLT:MULTFLT_F1', - 'MULTFLT:MULTFLT_F2', - 'MULTFLT:MULTFLT_F3', - 'MULTFLT:MULTFLT_F4', - 'MULTFLT:MULTFLT_F5', - 'MULTZ:MULTZ_MIDREEK', - 'INTERPOLATE_RELPERM:INTERPOLATE_GO', - 'INTERPOLATE_RELPERM:INTERPOLATE_WO', - 'LOG10_MULTFLT:MULTFLT_F1', - 'LOG10_MULTFLT:MULTFLT_F2', - 'LOG10_MULTFLT:MULTFLT_F3', - 'LOG10_MULTFLT:MULTFLT_F4', - 'LOG10_MULTFLT:MULTFLT_F5', - 'LOG10_MULTZ:MULTZ_MIDREEK', - "RMSGLOBPARAMS:COHIBA_MODEL_MODE", - "COHIBA_MODEL_MODE"] - parameterdf = self.parameterdf.loc[self.parameterdf["ENSEMBLE"] == ensemble] - param_df = parameterdf.drop(columns=parameter_filters) - df = pd.merge(responsedf, param_df, on=["REAL"]).drop(columns=["REAL", "ENSEMBLE"]) - model = gen_model(df, response, 9, interaction) - return make_p_values_plot(model) - + paramdf = self.parameterdf + + paramdf.columns = [colname.replace(":","_") if ":" in colname else colname for colname in paramdf.columns] + paramdf = paramdf.loc[paramdf["ENSEMBLE"] == ensemble] + paramdf.drop(columns=force_out, inplace=True) + + df = pd.merge(responsedf, paramdf, on=["REAL"]).drop(columns=["REAL", "ENSEMBLE"]) + model = gen_model(df, response, nvars, interaction) + p_values_plot = make_p_values_plot(model) + + table = model.summary2().tables[1] + table.index.name = "Parameter" + table.reset_index(inplace=True) + columns = [{"name": i, "id": i, 'type': 'numeric', "format": Format(precision=4)} for i in table.columns] + data = list(table.to_dict("index").values()) + + return ( + p_values_plot, + data, + columns, + f"Multiple regression with {response} as response") - - @property def layout(self): """Main layout""" - return wcc.FlexBox( + return html.Div( id=self.ids("layout"), children=[ + wcc.FlexBox( + id=self.ids("layout p-values-graph"), + children=[ + html.Div( + style={'flex': 2}, + children=wcc.Graph( + id=self.ids('p-values-graph'), + figure={ + "data": [{"type": "bar", "x": [1, 2, 3],"y": [1, 3, 2]}], + "layout": {"title": {"text": "A Figure Specified By Python Dictionary"}} + } + ) + ), + html.Div( + style={"flex": 1}, + children=self.control_layout + self.filter_layout + if self.response_filters + else [], + ), + ] + ), + wcc.FlexBox( html.Div( - style={'flex': 2}, - children=wcc.Graph( - id=self.ids('p-values-graph'), - figure={ - "data": [{"type": "bar", "x": [1, 2, 3],"y": [1, 3, 2]}], - "layout": {"title": {"text": "A Figure Specified By Python Dictionary"}} - } - ) - ), - html.Div( - style={"flex": 1}, - children=self.control_layout + self.filter_layout - if self.response_filters - else [], - ), - ], - ) + id=self.ids("layout-table"), + style={"flex": 3}, + children=[ + html.Div( + id=self.ids("table_title"), + style={"textAlign": "center"}, + children="Ttitle", + ), + DataTable( + id=self.ids("table"), + sort_action="native", + filter_action="native", + page_action="native", + page_size=10, + ), + ], + ),)]) + def make_p_values_plot(model): """ Sorting the dictionary in ascending order and making lists for parameters and p-values """ @@ -486,6 +556,7 @@ def gen_model( else: return forward_selected(df, response, maxvars=max_vars) + def gen_interaction_df( df: pd.DataFrame, response: str, @@ -504,6 +575,7 @@ def gen_interaction_df( inter_only)) return interaction_df.join(df[response]) + def forward_selected_interaction(data, response, maxvars=9): """Linear model designed by forward selection. @@ -552,6 +624,7 @@ def forward_selected_interaction(data, response, maxvars=9): model = smf.ols(formula, data).fit() return model + def forward_selected(data, response, maxvars=9): # TODO find way to remove non-significant variables form entering model. """Linear model designed by forward selection. From f7fb791b9f25c62006446637715587662335af7d Mon Sep 17 00:00:00 2001 From: "jostein.gj@gmail.com" Date: Thu, 2 Jul 2020 15:33:36 +0200 Subject: [PATCH 5/5] p-value plot working, kinda, table removed --- webviz_subsurface/plugins/_test2_plug.py | 94 ++++++------------------ 1 file changed, 23 insertions(+), 71 deletions(-) diff --git a/webviz_subsurface/plugins/_test2_plug.py b/webviz_subsurface/plugins/_test2_plug.py index 52acd7087..79d81a2cf 100644 --- a/webviz_subsurface/plugins/_test2_plug.py +++ b/webviz_subsurface/plugins/_test2_plug.py @@ -15,8 +15,6 @@ from webviz_config.utils import calculate_slider_step import statsmodels.formula.api as smf from sklearn.preprocessing import PolynomialFeatures -from dash_table import DataTable -from dash_table.Format import Format from .._datainput.fmu_input import load_parameters, load_csv @@ -360,29 +358,15 @@ def model_input_callbacks(self): hollabacks.append(Input(self.ids(f"filter-{col_name}"), "value")) return hollabacks - @property - def table_input_callbacks(self): - """List of Inputs for table callback""" - callbacks = [ - Input(self.ids("ensemble"), "value"), - Input(self.ids("responses"), "value"), - ] - if self.response_filters: - for col_name in self.response_filters: - callbacks.append(Input(self.ids(f"filter-{col_name}"), "value")) - return callbacks def set_callbacks(self, app): @app.callback( [ - Output(self.ids("p-values-graph"), "figure"), - Output(self.ids("table"), "data"), - Output(self.ids("table"), "columns"), - Output(self.ids("table_title"), "children"), + Output(self.ids("p-values-graph"), "figure") ], self.model_input_callbacks, ) - def update_model_plot(ensemble, response, interaction, force_out, nvars, *filters): + def update_pvalue_plot(ensemble, response, interaction, force_out, nvars, *filters): filteroptions = self.make_response_filters(filters) responsedf = filter_and_sum_responses( self.responsedf, @@ -399,68 +383,36 @@ def update_model_plot(ensemble, response, interaction, force_out, nvars, *filter df = pd.merge(responsedf, paramdf, on=["REAL"]).drop(columns=["REAL", "ENSEMBLE"]) model = gen_model(df, response, nvars, interaction) - p_values_plot = make_p_values_plot(model) + return make_p_values_plot(model) - table = model.summary2().tables[1] - table.index.name = "Parameter" - table.reset_index(inplace=True) - columns = [{"name": i, "id": i, 'type': 'numeric', "format": Format(precision=4)} for i in table.columns] - data = list(table.to_dict("index").values()) - - return ( - p_values_plot, - data, - columns, - f"Multiple regression with {response} as response") + + @property def layout(self): """Main layout""" - return html.Div( + return wcc.FlexBox( id=self.ids("layout"), children=[ - wcc.FlexBox( - id=self.ids("layout p-values-graph"), - children=[ - html.Div( - style={'flex': 2}, - children=wcc.Graph( - id=self.ids('p-values-graph'), - figure={ - "data": [{"type": "bar", "x": [1, 2, 3],"y": [1, 3, 2]}], - "layout": {"title": {"text": "A Figure Specified By Python Dictionary"}} - } - ) - ), - html.Div( - style={"flex": 1}, - children=self.control_layout + self.filter_layout - if self.response_filters - else [], - ), - ] - ), - wcc.FlexBox( html.Div( - id=self.ids("layout-table"), - style={"flex": 3}, - children=[ - html.Div( - id=self.ids("table_title"), - style={"textAlign": "center"}, - children="Ttitle", - ), - DataTable( - id=self.ids("table"), - sort_action="native", - filter_action="native", - page_action="native", - page_size=10, - ), - ], - ),)]) - + style={'flex': 2}, + children=wcc.Graph( + id=self.ids('p-values-graph'), + figure={ + "data": [{"type": "bar", "x": [1, 2, 3],"y": [1, 3, 2]}], + "layout": {"title": {"text": "A Figure Specified By Python Dictionary"}} + } + ) + ), + html.Div( + style={"flex": 1}, + children=self.control_layout + self.filter_layout + if self.response_filters + else [], + ), + ], + ) def make_p_values_plot(model): """ Sorting the dictionary in ascending order and making lists for parameters and p-values """