|
1 | 1 | """ The module that stores all the default transformers and the register functions. |
2 | 2 | """ |
| 3 | +from __future__ import division |
| 4 | + |
3 | 5 | import re |
4 | | -import math |
5 | 6 | import sys |
| 7 | +import math |
6 | 8 | import json |
7 | 9 | import inspect |
8 | 10 | import logging |
9 | 11 | import pathlib |
| 12 | +import operator |
10 | 13 | import importlib |
11 | 14 | from abc import abstractmethod |
12 | 15 |
|
| 16 | +from pyparsing import (Literal, CaselessLiteral, Word, Combine, Group, Optional, |
| 17 | + ZeroOrMore, Forward, nums, alphas, oneOf) |
| 18 | + |
13 | 19 | import numpy as np |
14 | 20 | import pandas as pd |
15 | 21 | import pandera.pandas as pa |
@@ -1240,6 +1246,7 @@ def __call__(self, row, i): |
1240 | 1246 | value, edge_type, node_type, reverse_edge = self.create(val, row) |
1241 | 1247 | yield value, edge_type, node_type, reverse_edge |
1242 | 1248 |
|
| 1249 | + |
1243 | 1250 | class split_replace(base.Transformer): |
1244 | 1251 |
|
1245 | 1252 | def __init__(self, |
@@ -1299,3 +1306,185 @@ def __call__(self, row, i): |
1299 | 1306 | for val in self.replace.value_maker(["replace_column"], pseudorow, i): |
1300 | 1307 | value, edge_type, node_type, reverse_edge = self.create(val, row) |
1301 | 1308 | yield value, edge_type, node_type, reverse_edge |
| 1309 | + |
| 1310 | + |
| 1311 | +class NumericStringParser(object): |
| 1312 | + ''' |
| 1313 | + Most of this code comes from the fourFn.py pyparsing example |
| 1314 | +
|
| 1315 | + ''' |
| 1316 | + |
| 1317 | + def pushFirst(self, strg, loc, toks): |
| 1318 | + self.exprStack.append(toks[0]) |
| 1319 | + |
| 1320 | + def pushUMinus(self, strg, loc, toks): |
| 1321 | + if toks and toks[0] == '-': |
| 1322 | + self.exprStack.append('unary -') |
| 1323 | + |
| 1324 | + def __init__(self): |
| 1325 | + """ |
| 1326 | + expop :: '^' |
| 1327 | + multop :: '*' | '/' |
| 1328 | + addop :: '+' | '-' |
| 1329 | + integer :: ['+' | '-'] '0'..'9'+ |
| 1330 | + atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' |
| 1331 | + factor :: atom [ expop factor ]* |
| 1332 | + term :: factor [ multop factor ]* |
| 1333 | + expr :: term [ addop term ]* |
| 1334 | + """ |
| 1335 | + point = Literal(".") |
| 1336 | + e = CaselessLiteral("E") |
| 1337 | + fnumber = Combine(Word("+-" + nums, nums) + |
| 1338 | + Optional(point + Optional(Word(nums))) + |
| 1339 | + Optional(e + Word("+-" + nums, nums))) |
| 1340 | + ident = Word(alphas, alphas + nums + "_$") |
| 1341 | + plus = Literal("+") |
| 1342 | + minus = Literal("-") |
| 1343 | + mult = Literal("*") |
| 1344 | + div = Literal("/") |
| 1345 | + lpar = Literal("(").suppress() |
| 1346 | + rpar = Literal(")").suppress() |
| 1347 | + addop = plus | minus |
| 1348 | + multop = mult | div |
| 1349 | + expop = Literal("^") |
| 1350 | + pi = CaselessLiteral("PI") |
| 1351 | + expr = Forward() |
| 1352 | + atom = ((Optional(oneOf("- +")) + |
| 1353 | + (ident + lpar + expr + rpar | pi | e | fnumber).setParseAction(self.pushFirst)) |
| 1354 | + | Optional(oneOf("- +")) + Group(lpar + expr + rpar) |
| 1355 | + ).setParseAction(self.pushUMinus) |
| 1356 | + # by defining exponentiation as "atom [ ^ factor ]..." instead of |
| 1357 | + # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right |
| 1358 | + # that is, 2^3^2 = 2^(3^2), not (2^3)^2. |
| 1359 | + factor = Forward() |
| 1360 | + factor << atom + \ |
| 1361 | + ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) |
| 1362 | + term = factor + \ |
| 1363 | + ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) |
| 1364 | + expr << term + \ |
| 1365 | + ZeroOrMore((addop + term).setParseAction(self.pushFirst)) |
| 1366 | + # addop_term = ( addop + term ).setParseAction( self.pushFirst ) |
| 1367 | + # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) |
| 1368 | + # expr << general_term |
| 1369 | + self.bnf = expr |
| 1370 | + # map operator symbols to corresponding arithmetic operations |
| 1371 | + epsilon = 1e-12 |
| 1372 | + self.opn = {"+": operator.add, |
| 1373 | + "-": operator.sub, |
| 1374 | + "*": operator.mul, |
| 1375 | + "/": operator.truediv, |
| 1376 | + "^": operator.pow} |
| 1377 | + self.fn = {"sin": math.sin, |
| 1378 | + "cos": math.cos, |
| 1379 | + "tan": math.tan, |
| 1380 | + "exp": math.exp, |
| 1381 | + "abs": abs, |
| 1382 | + "trunc": lambda a: int(a), |
| 1383 | + "round": round, |
| 1384 | + "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0} |
| 1385 | + |
| 1386 | + def evaluateStack(self, s): |
| 1387 | + op = s.pop() |
| 1388 | + if op == 'unary -': |
| 1389 | + return -self.evaluateStack(s) |
| 1390 | + if op in "+-*/^": |
| 1391 | + op2 = self.evaluateStack(s) |
| 1392 | + op1 = self.evaluateStack(s) |
| 1393 | + return self.opn[op](op1, op2) |
| 1394 | + elif op == "PI": |
| 1395 | + return math.pi # 3.1415926535 |
| 1396 | + elif op == "E": |
| 1397 | + return math.e # 2.718281828 |
| 1398 | + elif op in self.fn: |
| 1399 | + return self.fn[op](self.evaluateStack(s)) |
| 1400 | + elif op[0].isalpha(): |
| 1401 | + return 0 |
| 1402 | + else: |
| 1403 | + return float(op) |
| 1404 | + |
| 1405 | + def eval(self, num_string, parseAll=True): |
| 1406 | + self.exprStack = [] |
| 1407 | + results = self.bnf.parseString(num_string, parseAll) |
| 1408 | + val = self.evaluateStack(self.exprStack[:]) |
| 1409 | + return val |
| 1410 | + |
| 1411 | + |
| 1412 | +class maths(base.Transformer): |
| 1413 | + |
| 1414 | + class ValueMaker(make_value.ValueMaker): |
| 1415 | + |
| 1416 | + def __init__(self, |
| 1417 | + raise_errors: bool = True, |
| 1418 | + operation: str = None, |
| 1419 | + nsp = NumericStringParser() |
| 1420 | + ): |
| 1421 | + self.operation = operation |
| 1422 | + self.nsp = nsp |
| 1423 | + super().__init__(raise_errors) |
| 1424 | + |
| 1425 | + def __call__(self, columns, row, i): |
| 1426 | + |
| 1427 | + try: |
| 1428 | + eq = self.operation.format_map(row) |
| 1429 | + result = self.nsp.eval(eq) |
| 1430 | + except KeyError as err: |
| 1431 | + self.error(f"{err}, available keys:\n{row}", |
| 1432 | + exception = exceptions.TransformerConfigError, |
| 1433 | + index = i, |
| 1434 | + section = "maths" |
| 1435 | + ) |
| 1436 | + except Exception as err: |
| 1437 | + self.error(f"{err}, while evaluating operation: {eq}", |
| 1438 | + exception = exceptions.TransformerDataError, |
| 1439 | + index = i, |
| 1440 | + section = "maths" |
| 1441 | + ) |
| 1442 | + |
| 1443 | + yield result |
| 1444 | + |
| 1445 | + def __init__(self, |
| 1446 | + properties_of, |
| 1447 | + label_maker = None, |
| 1448 | + branching_properties = None, |
| 1449 | + columns=None, |
| 1450 | + output_validator: validate.OutputValidator = None, |
| 1451 | + multi_type_dict = None, |
| 1452 | + raise_errors = True, |
| 1453 | + operation = None, |
| 1454 | + **kwargs |
| 1455 | + ): |
| 1456 | + """ |
| 1457 | + Initialize the math transformer. |
| 1458 | +
|
| 1459 | + Args:. |
| 1460 | + target_element_properties: Properties of the node. |
| 1461 | + value_maker: the ValueMaker object used for the logic of cell value selection for each transformer. |
| 1462 | + label_maker: the LabelMaker object used for handling the creation of the output of the transformer. Default is None. |
| 1463 | + branching_properties: in case of branching on cell values, the dictionary holding the properties for each branch. |
| 1464 | + columns: The columns to be processed. |
| 1465 | + operation: A format string assembling the column names in an arithmetic operation. |
| 1466 | + output_validator: the OutputValidator object used for validating transformer output. |
| 1467 | + multi_type_dict: the dictionary holding regex patterns for node and edge type branching based on cell values. |
| 1468 | + raise_errors: if True, the caller is asking for raising exceptions when an error occurs |
| 1469 | + """ |
| 1470 | + |
| 1471 | + self.value_maker = self.ValueMaker( |
| 1472 | + raise_errors=raise_errors, |
| 1473 | + operation=operation |
| 1474 | + ) |
| 1475 | + |
| 1476 | + super().__init__(properties_of, |
| 1477 | + self.value_maker, |
| 1478 | + label_maker, |
| 1479 | + branching_properties, |
| 1480 | + columns, |
| 1481 | + output_validator, |
| 1482 | + multi_type_dict, |
| 1483 | + raise_errors=raise_errors, |
| 1484 | + **kwargs |
| 1485 | + ) |
| 1486 | + |
| 1487 | + if not operation: # Neither empty string nor None. |
| 1488 | + self.error(f"The `operation` parameter of the `{type(self).__name__}` transformer cannot be an empty string.") |
| 1489 | + |
| 1490 | + |
0 commit comments