Math: Rewrite calc functions with a proper evaluator.

progval · progval · commit 3848ae78de45 · 2019-11-09T15:49:31.000+01:00
Instead of hacking around eval(), which everyone knows is a bad idea
even with prior expression sanitizing.
diff --git a/plugins/Math/evaluator.py b/plugins/Math/evaluator.py
@@ -0,0 +1,169 @@
+###
+# Copyright (c) 2019, Valentin Lorentz
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#   * Redistributions of source code must retain the above copyright notice,
+#     this list of conditions, and the following disclaimer.
+#   * Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions, and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   * Neither the name of the author of this software nor the name of
+#     contributors to this software may be used to endorse or promote products
+#     derived from this software without specific prior written consent.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+###
+
+import ast
+import math
+import cmath
+import operator
+
+class InvalidNode(Exception):
+    pass
+
+def filter_module(module, safe_names):
+    return dict([
+        (name, getattr(module, name))
+        for name in safe_names
+        if hasattr(module, name)
+    ])
+
+UNARY_OPS = {
+    ast.UAdd: lambda x: x,
+    ast.USub: lambda x: -x,
+}
+
+BIN_OPS = {
+    ast.Add: operator.add,
+    ast.Sub: operator.sub,
+    ast.Mult: operator.mul,
+    ast.Div: operator.truediv,
+    ast.Pow: operator.pow,
+    ast.BitXor: operator.xor,
+    ast.BitOr: operator.or_,
+    ast.BitAnd: operator.and_,
+}
+
+MATH_CONSTANTS = 'e inf nan pi tau'.split()
+SAFE_MATH_FUNCTIONS = (
+    'acos acosh asin asinh atan atan2 atanh copysign cos cosh degrees erf '
+    'erfc exp expm1 fabs fmod frexp fsum gamma hypot ldexp lgamma log log10 '
+    'log1p log2 modf pow radians remainder sin sinh tan tanh'
+).split()
+SAFE_CMATH_FUNCTIONS = (
+    'acos acosh asin asinh atan atanh cos cosh exp inf infj log log10 '
+    'nanj phase polar rect sin sinh tan tanh tau'
+).split()
+
+SAFE_ENV = filter_module(math, MATH_CONSTANTS + SAFE_MATH_FUNCTIONS)
+SAFE_ENV.update(filter_module(cmath, SAFE_CMATH_FUNCTIONS))
+
+def _sqrt(x):
+    if isinstance(x, complex) or x < 0:
+        return cmath.sqrt(x)
+    else:
+        return math.sqrt(x)
+
+def _cbrt(x):
+    return math.pow(x, 1.0/3)
+
+def _factorial(x):
+    if x<=10000:
+        return float(math.factorial(x))
+    else:
+        raise Exception('factorial argument too large')
+
+SAFE_ENV.update({
+    'i': 1j,
+    'abs': abs,
+    'max': max,
+    'min': min,
+    'round': lambda x, y=0: round(x, int(y)),
+    'factorial': _factorial,
+    'sqrt': _sqrt,
+    'cbrt': _cbrt,
+    'ceil': lambda x: float(math.ceil(x)),
+    'floor': lambda x: float(math.floor(x)),
+})
+
+UNSAFE_ENV = SAFE_ENV.copy()
+# Add functions that return integers
+UNSAFE_ENV.update(filter_module(math, 'ceil floor factorial gcd'.split()))
+
+
+# It would be nice if ast.literal_eval used a visitor so we could subclass
+# to extend it, but it doesn't, so let's reimplement it entirely.
+class SafeEvalVisitor(ast.NodeVisitor):
+    def __init__(self, allow_ints):
+        self._allow_ints = allow_ints
+        self._env = UNSAFE_ENV if allow_ints else SAFE_ENV
+
+    def _convert_num(self, x):
+        """Converts numbers to complex if ints are not allowed."""
+        if self._allow_ints:
+            return x
+        else:
+            x = complex(x)
+            if x.imag == 0:
+                x = x.real
+                # Need to use string-formatting here instead of str() because
+                # use of str() on large numbers loses information:
+                # str(float(33333333333333)) => '3.33333333333e+13'
+                # float('3.33333333333e+13') => 33333333333300.0
+                return float('%.16f' % x)
+            else:
+                return x
+
+    def visit_Expression(self, node):
+        return self.visit(node.body)
+
+    def visit_Num(self, node):
+        return self._convert_num(node.n)
+
+    def visit_Name(self, node):
+        id_ = node.id.lower()
+        if id_ in self._env:
+            return self._env[id_]
+        else:
+            raise NameError(node.id)
+
+    def visit_Call(self, node):
+        func = self.visit(node.func)
+        args = map(self.visit, node.args)
+        # TODO: keywords?
+        return func(*args)
+
+    def visit_UnaryOp(self, node):
+        op = UNARY_OPS.get(node.op.__class__)
+        if op:
+            return op(self.visit(node.operand))
+        else:
+            raise InvalidNode('illegal operator %s' % node.op.__class__.__name__)
+
+    def visit_BinOp(self, node):
+        op = BIN_OPS.get(node.op.__class__)
+        if op:
+            return op(self.visit(node.left), self.visit(node.right))
+        else:
+            raise InvalidNode('illegal operator %s' % node.op.__class__.__name__)
+
+    def generic_visit(self, node):
+        raise InvalidNode('illegal construct %s' % node.__class__.__name__)
+
+def safe_eval(text, allow_ints):
+    node = ast.parse(text, mode='eval')
+    return SafeEvalVisitor(allow_ints).visit(node)
diff --git a/plugins/Math/plugin.py b/plugins/Math/plugin.py
@@ -44,6 +44,7 @@
 _ = PluginInternationalization('Math')
 
 from .local import convertcore
+from .evaluator import safe_eval, InvalidNode, SAFE_ENV
 
 baseArg = ('int', 'base', lambda i: i <= 36)
 
@@ -97,36 +98,6 @@ def _convertBaseToBase(self, number, toBase, fromBase):
             return str(number)
         return self._convertDecimalToBase(number, toBase)
 
-    _mathEnv = {'__builtins__': types.ModuleType('__builtins__'), 'i': 1j}
-    _mathEnv.update(math.__dict__)
-    _mathEnv.update(cmath.__dict__)
-    def _sqrt(x):
-        if isinstance(x, complex) or x < 0:
-            return cmath.sqrt(x)
-        else:
-            return math.sqrt(x)
-    def _cbrt(x):
-        return math.pow(x, 1.0/3)
-    def _factorial(x):
-        if x<=10000:
-            return float(math.factorial(x))
-        else:
-            raise Exception('factorial argument too large')
-    _mathEnv['sqrt'] = _sqrt
-    _mathEnv['cbrt'] = _cbrt
-    _mathEnv['abs'] = abs
-    _mathEnv['max'] = max
-    _mathEnv['min'] = min
-    _mathEnv['round'] = lambda x, y=0: round(x, int(y))
-    _mathSafeEnv = dict([(x,y) for x,y in _mathEnv.items()])
-    _mathSafeEnv['factorial'] = _factorial
-    _mathRe = re.compile(r'((?:(?<![A-Fa-f\d)])-)?'
-                         r'(?:0x[A-Fa-f\d]+|'
-                         r'0[0-7]+|'
-                         r'\d+\.\d+|'
-                         r'\.\d+|'
-                         r'\d+\.|'
-                         r'\d+))')
     def _floatToString(self, x):
         if -1e-10 < x < 1e-10:
             return '0'
@@ -157,17 +128,6 @@ def _complexToString(self, x):
         else:
             return '%s%s' % (realS, imagS)
 
-    _calc_match_forbidden_chars = re.compile('[_\[\]]')
-    _calc_remover = utils.str.MultipleRemover('_[] \t')
-    ###
-    # So this is how the 'calc' command works:
-    # First, we make a nice little safe environment for evaluation; basically,
-    # the names in the 'math' and 'cmath' modules.  Then, we remove the ability
-    # of a random user to get ints evaluated: this means we have to turn all
-    # int literals (even octal numbers and hexadecimal numbers) into floats.
-    # Then we delete all square brackets, underscores, and whitespace, so no
-    # one can do list comprehensions or call __...__ functions.
-    ###
     @internationalizeDocstring
     def calc(self, irc, msg, args, text):
         """<math expression>
@@ -178,57 +138,17 @@ def calc(self, irc, msg, args, text):
         crash to the bot with something like '10**10**10**10'.  One consequence
         is that large values such as '10**24' might not be exact.
         """
-        try:
-            text = str(text)
-        except UnicodeEncodeError:
-            irc.error(_("There's no reason you should have fancy non-ASCII "
-                            "characters in your mathematical expression. "
-                            "Please remove them."))
-            return
-        if self._calc_match_forbidden_chars.match(text):
-            # Note: this is important to keep this to forbid usage of
-            # __builtins__
-            irc.error(_('There\'s really no reason why you should have '
-                           'underscores or brackets in your mathematical '
-                           'expression.  Please remove them.'))
-            return
-        text = self._calc_remover(text)
-        if 'lambda' in text:
-            irc.error(_('You can\'t use lambda in this command.'))
-            return
-        text = text.lower()
-        def handleMatch(m):
-            s = m.group(1)
-            if s.startswith('0x'):
-                i = int(s, 16)
-            elif s.startswith('0') and '.' not in s:
-                try:
-                    i = int(s, 8)
-                except ValueError:
-                    i = int(s)
-            else:
-                i = float(s)
-            x = complex(i)
-            if x.imag == 0:
-                x = x.real
-                # Need to use string-formatting here instead of str() because
-                # use of str() on large numbers loses information:
-                # str(float(33333333333333)) => '3.33333333333e+13'
-                # float('3.33333333333e+13') => 33333333333300.0
-                return '%.16f' % x
-            return str(x)
-        text = self._mathRe.sub(handleMatch, text)
         try:
             self.log.info('evaluating %q from %s', text, msg.prefix)
-            x = complex(eval(text, self._mathSafeEnv, self._mathSafeEnv))
+            x = complex(safe_eval(text, allow_ints=False))
             irc.reply(self._complexToString(x))
         except OverflowError:
             maxFloat = math.ldexp(0.9999999999999999, 1024)
             irc.error(_('The answer exceeded %s or so.') % maxFloat)
-        except TypeError:
-            irc.error(_('Something in there wasn\'t a valid number.'))
+        except InvalidNode as e:
+            irc.error(_('Invalid syntax: %s') % e.args[0])
         except NameError as e:
-            irc.error(_('%s is not a defined function.') % str(e).split()[1])
+            irc.error(_('%s is not a defined function.') % e.args[0])
         except Exception as e:
             irc.error(str(e))
     calc = wrap(calc, ['text'])
@@ -241,28 +161,15 @@ def icalc(self, irc, msg, args, text):
         math, and can thus cause the bot to suck up CPU.  Hence it requires
         the 'trusted' capability to use.
         """
-        if self._calc_match_forbidden_chars.match(text):
-            # Note: this is important to keep this to forbid usage of
-            # __builtins__
-            irc.error(_('There\'s really no reason why you should have '
-                           'underscores or brackets in your mathematical '
-                           'expression.  Please remove them.'))
-            return
-        # This removes spaces, too, but we'll leave the removal of _[] for
-        # safety's sake.
-        text = self._calc_remover(text)
-        if 'lambda' in text:
-            irc.error(_('You can\'t use lambda in this command.'))
-            return
-        text = text.replace('lambda', '')
         try:
             self.log.info('evaluating %q from %s', text, msg.prefix)
-            irc.reply(str(eval(text, self._mathEnv, self._mathEnv)))
+            x = safe_eval(text, allow_ints=True)
+            irc.reply(str(x))
         except OverflowError:
             maxFloat = math.ldexp(0.9999999999999999, 1024)
             irc.error(_('The answer exceeded %s or so.') % maxFloat)
-        except TypeError:
-            irc.error(_('Something in there wasn\'t a valid number.'))
+        except InvalidNode as e:
+            irc.error(_('Invalid syntax: %s') % e.args[0])
         except NameError as e:
             irc.error(_('%s is not a defined function.') % str(e).split()[1])
         except Exception as e:
@@ -286,8 +193,8 @@ def rpn(self, irc, msg, args):
                     x = abs(x)
                 stack.append(x)
             except ValueError: # Not a float.
-                if arg in self._mathSafeEnv:
-                    f = self._mathSafeEnv[arg]
+                if arg in SAFE_ENV:
+                    f = SAFE_ENV[arg]
                     if callable(f):
                         called = False
                         arguments = []
@@ -310,7 +217,7 @@ def rpn(self, irc, msg, args):
                     arg1 = stack.pop()
                     s = '%s%s%s' % (arg1, arg, arg2)
                     try:
-                        stack.append(eval(s, self._mathSafeEnv, self._mathSafeEnv))
+                        stack.append(safe_eval(s, allow_ints=False))
                     except SyntaxError:
                         irc.error(format(_('%q is not a defined function.'),
                                          arg))
diff --git a/plugins/Math/test.py b/plugins/Math/test.py
@@ -91,9 +91,6 @@ def testBase(self):
         self.assertError('base 4 4')
         self.assertError('base 10 12 A')
 
-        print()
-        print("If we have not fixed a bug with Math.base, the following ")
-        print("tests will hang the test-suite.")
         self.assertRegexp('base 2 10 [base 10 2 -12]', '-12')
         self.assertRegexp('base 16 2 [base 2 16 -110101]', '-110101')
 
@@ -117,7 +114,10 @@ def testCalc(self):
         self.assertError('calc factorial(20000)')
 
     def testCalcNoNameError(self):
-        self.assertNotRegexp('calc foobar(x)', 'NameError')
+        self.assertRegexp('calc foobar(x)', 'foobar is not a defined function')
+
+    def testCalcInvalidNode(self):
+        self.assertRegexp('calc {"foo": "bar"}', 'Illegal construct Dict')
 
     def testCalcImaginary(self):
         self.assertResponse('calc 3 + sqrt(-1)', '3+i')