feat(transformer): adds the "maths" transformer

jdreo · jdreo · commit 6d93484bf8ef · 2026-04-21T19:08:21.000+02:00
diff --git a/docs/sections/mapping_api.rst b/docs/sections/mapping_api.rst
@@ -571,6 +571,32 @@ Is equivalent to:
         raise exceptions.TransformerConfigError("Unknown value")
 
 
+maths
+~~~~~
+
+The *maths* transformer allows to compute arithmetic operations on its
+configured columns.
+
+Its main configuration variable is the ``operation``, which is a format string
+involving the names of the columns.
+
+This transformer returns a floating-point number which is the result of the
+arithmetic operation.
+
+For example:
+
+.. code:: yaml
+    - maths:
+        columns:
+            - x
+            - y
+            - z
+        operation: "{x} * {y} + sin({z})"
+        to_object: result
+        via_relation: has_result
+
+
+
 Case manipulation transformers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/src/ontoweaver/transformer.py b/src/ontoweaver/transformer.py
@@ -1,15 +1,21 @@
 """ The module that stores all the default transformers and the register functions.
 """
+from __future__ import division
+
 import re
-import math
 import sys
+import math
 import json
 import inspect
 import logging
 import pathlib
+import operator
 import importlib
 from abc import abstractmethod
 
+from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional,
+                       ZeroOrMore, Forward, nums, alphas, oneOf)
+
 import numpy as np
 import pandas as pd
 import pandera.pandas as pa
@@ -1240,6 +1246,7 @@ def __call__(self, row, i):
                     value, edge_type, node_type, reverse_edge = self.create(val, row)
                     yield value, edge_type, node_type, reverse_edge
 
+
 class split_replace(base.Transformer):
 
     def __init__(self,
@@ -1299,3 +1306,185 @@ def __call__(self, row, i):
                 for val in self.replace.value_maker(["replace_column"], pseudorow, i):
                     value, edge_type, node_type, reverse_edge = self.create(val, row)
                     yield value, edge_type, node_type, reverse_edge
+
+
+class NumericStringParser(object):
+    '''
+    Most of this code comes from the fourFn.py pyparsing example
+
+    '''
+
+    def pushFirst(self, strg, loc, toks):
+        self.exprStack.append(toks[0])
+
+    def pushUMinus(self, strg, loc, toks):
+        if toks and toks[0] == '-':
+            self.exprStack.append('unary -')
+
+    def __init__(self):
+        """
+        expop   :: '^'
+        multop  :: '*' | '/'
+        addop   :: '+' | '-'
+        integer :: ['+' | '-'] '0'..'9'+
+        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
+        factor  :: atom [ expop factor ]*
+        term    :: factor [ multop factor ]*
+        expr    :: term [ addop term ]*
+        """
+        point = Literal(".")
+        e = CaselessLiteral("E")
+        fnumber = Combine(Word("+-" + nums, nums) +
+                          Optional(point + Optional(Word(nums))) +
+                          Optional(e + Word("+-" + nums, nums)))
+        ident = Word(alphas, alphas + nums + "_$")
+        plus = Literal("+")
+        minus = Literal("-")
+        mult = Literal("*")
+        div = Literal("/")
+        lpar = Literal("(").suppress()
+        rpar = Literal(")").suppress()
+        addop = plus | minus
+        multop = mult | div
+        expop = Literal("^")
+        pi = CaselessLiteral("PI")
+        expr = Forward()
+        atom = ((Optional(oneOf("- +")) +
+                 (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst))
+                | Optional(oneOf("- +")) + Group(lpar + expr + rpar)
+                ).setParseAction(self.pushUMinus)
+        # by defining exponentiation as "atom [ ^ factor ]..." instead of
+        # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
+        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
+        factor = Forward()
+        factor << atom + \
+            ZeroOrMore((expop + factor).setParseAction(self.pushFirst))
+        term = factor + \
+            ZeroOrMore((multop + factor).setParseAction(self.pushFirst))
+        expr << term + \
+            ZeroOrMore((addop + term).setParseAction(self.pushFirst))
+        # addop_term = ( addop + term ).setParseAction( self.pushFirst )
+        # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term)
+        # expr <<  general_term
+        self.bnf = expr
+        # map operator symbols to corresponding arithmetic operations
+        epsilon = 1e-12
+        self.opn = {"+": operator.add,
+                    "-": operator.sub,
+                    "*": operator.mul,
+                    "/": operator.truediv,
+                    "^": operator.pow}
+        self.fn = {"sin": math.sin,
+                   "cos": math.cos,
+                   "tan": math.tan,
+                   "exp": math.exp,
+                   "abs": abs,
+                   "trunc": lambda a: int(a),
+                   "round": round,
+                   "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
+
+    def evaluateStack(self, s):
+        op = s.pop()
+        if op == 'unary -':
+            return -self.evaluateStack(s)
+        if op in "+-*/^":
+            op2 = self.evaluateStack(s)
+            op1 = self.evaluateStack(s)
+            return self.opn[op](op1, op2)
+        elif op == "PI":
+            return math.pi  # 3.1415926535
+        elif op == "E":
+            return math.e  # 2.718281828
+        elif op in self.fn:
+            return self.fn[op](self.evaluateStack(s))
+        elif op[0].isalpha():
+            return 0
+        else:
+            return float(op)
+
+    def eval(self, num_string, parseAll=True):
+        self.exprStack = []
+        results = self.bnf.parseString(num_string, parseAll)
+        val = self.evaluateStack(self.exprStack[:])
+        return val
+
+
+class maths(base.Transformer):
+
+    class ValueMaker(make_value.ValueMaker):
+
+        def __init__(self,
+            raise_errors: bool = True,
+            operation: str = None,
+            nsp = NumericStringParser()
+        ):
+            self.operation = operation
+            self.nsp = nsp
+            super().__init__(raise_errors)
+
+        def __call__(self, columns, row, i):
+
+            try:
+                eq = self.operation.format_map(row)
+                result = self.nsp.eval(eq)
+            except KeyError as err:
+                self.error(f"{err}, available keys:\n{row}",
+                    exception = exceptions.TransformerConfigError,
+                    index = i,
+                    section = "maths"
+                )
+            except Exception as err:
+                self.error(f"{err}, while evaluating operation: {eq}",
+                    exception = exceptions.TransformerDataError,
+                    index = i,
+                    section = "maths"
+                )
+
+            yield result
+
+    def __init__(self,
+            properties_of,
+            label_maker = None,
+            branching_properties = None,
+            columns=None,
+            output_validator: validate.OutputValidator = None,
+            multi_type_dict = None,
+            raise_errors = True,
+            operation = None,
+             **kwargs
+         ):
+        """
+        Initialize the math transformer.
+
+        Args:.
+            target_element_properties: Properties of the node.
+            value_maker: the ValueMaker object used for the logic of cell value selection for each transformer.
+            label_maker: the LabelMaker object used for handling the creation of the output of the transformer. Default is None.
+            branching_properties: in case of branching on cell values, the dictionary holding the properties for each branch.
+            columns: The columns to be processed.
+            operation: A format string assembling the column names in an arithmetic operation.
+            output_validator: the OutputValidator object used for validating transformer output.
+            multi_type_dict: the dictionary holding regex patterns for node and edge type branching based on cell values.
+            raise_errors: if True, the caller is asking for raising exceptions when an error occurs
+        """
+
+        self.value_maker = self.ValueMaker(
+            raise_errors=raise_errors,
+            operation=operation
+        )
+
+        super().__init__(properties_of,
+            self.value_maker,
+            label_maker,
+            branching_properties,
+            columns,
+            output_validator,
+            multi_type_dict,
+            raise_errors=raise_errors,
+            **kwargs
+        )
+
+        if not operation:  # Neither empty string nor None.
+            self.error(f"The `operation` parameter of the `{type(self).__name__}` transformer cannot be an empty string.")
+
+