Evaluate Math Equations From Unsafe User Input In Python
Solution 1:
Disclaimer: I'm the Alexer mentioned in the code in the other answer. To be honest, I kind of suggested the bytecode parsing approach only half-jokingly, since I happened to have 99% of the code lying around for an unrelated project and so could whip together a POC in like a couple of minutes. That said, there shouldn't be anything wrong with it, per se; it's just that it's a more complex machinery that is needed for this task. In fact, you should be able to get away with just disassembling the code [checking the opcodes against a whitelist], checking that the constants and names are valid, and executing it with plain, evil eval after that. You should just lose the ability to insert paranoid extra checks all over the execution. (Another disclaimer: I still wouldn't feel comfortable enough to do it with eval)
Anyway, I had a boring moment, so I wrote some code to do this the smart way; using the AST instead of the bytecode. It's just an extra flag to compile()
. (Or just ast.parse()
, since you'll want the types from the module anyway)
import ast
import operator
_operations = {
ast.Add: operator.add,
ast.Sub: operator.sub,
ast.Mult: operator.mul,
ast.Div: operator.div,
ast.Pow: operator.pow,
}
def_safe_eval(node, variables, functions):
ifisinstance(node, ast.Num):
return node.n
elifisinstance(node, ast.Name):
return variables[node.id] # KeyError -> Unsafe variableelifisinstance(node, ast.BinOp):
op = _operations[node.op.__class__] # KeyError -> Unsafe operation
left = _safe_eval(node.left, variables, functions)
right = _safe_eval(node.right, variables, functions)
ifisinstance(node.op, ast.Pow):
assert right < 100return op(left, right)
elifisinstance(node, ast.Call):
assertnot node.keywords andnot node.starargs andnot node.kwargs
assertisinstance(node.func, ast.Name), 'Unsafe function derivation'
func = functions[node.func.id] # KeyError -> Unsafe function
args = [_safe_eval(arg, variables, functions) for arg in node.args]
return func(*args)
assertFalse, 'Unsafe operation'defsafe_eval(expr, variables={}, functions={}):
node = ast.parse(expr, '<string>', 'eval').body
return _safe_eval(node, variables, functions)
if __name__ == '__main__':
import math
print safe_eval('sin(a*pi/b)', dict(a=1, b=2, pi=math.pi), dict(sin=math.sin))
The same thing applies to this as to the bytecode version; if you check the operations against a whitelist and check that the names and values are valid, you should be able to get away with calling eval on the AST. (But again, I still wouldn't do it. Because paranoid. And paranoia is good when eval is concerned)
Solution 2:
There is a relatively easy of doing this in Python without third party packages.
Using
compile()
to prepare a single-line Python expression to be bytecode foreval()
Not running the bytecode through
eval()
, but instead run it in your custom opcode loop and only implement opcodes which you really need. E.g. no built-ins, no attribute access, so the sandbox cannot escaped.
However there are some gotchas, like preparing for CPU exhaustion and memory exhaustion, which are not specific to this method and are issue on other approaches too.
Here is a full blog post about the topic. Here is a related gist. Below is shortened sample code.
""""
The orignal author: Alexer / #python.fi
"""import opcode
import dis
import sys
import multiprocessing
import time
# Python 3 requiredassert sys.version_info[0] == 3, "No country for old snakes"classUnknownSymbol(Exception):
""" There was a function or constant in the expression we don't support. """classBadValue(Exception):
""" The user tried to input dangerously big value. """
MAX_ALLOWED_VALUE = 2**63classBadCompilingInput(Exception):
""" The user tried to input something which might cause compiler to slow down. """defdisassemble(co):
""" Loop through Python bytecode and match instructions with our internal opcodes.
:param co: Python code object
"""
code = co.co_code
n = len(code)
i = 0
extended_arg = 0
result = []
while i < n:
op = code[i]
curi = i
i = i+1if op >= dis.HAVE_ARGUMENT:
# Python 2# oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
oparg = code[i] + code[i+1] * 256 + extended_arg
extended_arg = 0
i = i+2if op == dis.EXTENDED_ARG:
# Python 2#extended_arg = oparg*65536L
extended_arg = oparg*65536else:
oparg = None# print(opcode.opname[op])
opv = globals()[opcode.opname[op].replace('+', '_')](co, curi, i, op, oparg)
result.append(opv)
return result
# For the opcodes see dis.py# (Copy-paste)# https://docs.python.org/2/library/dis.htmlclassOpcode:
""" Base class for out internal opcodes. """
args = 0
pops = 0
pushes = 0def__init__(self, co, i, nexti, op, oparg):
self.co = co
self.i = i
self.nexti = nexti
self.op = op
self.oparg = oparg
defget_pops(self):
return self.pops
defget_pushes(self):
return self.pushes
deftouch_value(self, stack, frame):
assert self.pushes == 0for i inrange(self.pops):
stack.pop()
classOpcodeArg(Opcode):
args = 1classOpcodeConst(OpcodeArg):
defget_arg(self):
return self.co.co_consts[self.oparg]
classOpcodeName(OpcodeArg):
defget_arg(self):
return self.co.co_names[self.oparg]
classPOP_TOP(Opcode):
"""Removes the top-of-stack (TOS) item."""
pops = 1deftouch_value(self, stack, frame):
stack.pop()
classDUP_TOP(Opcode):
"""Duplicates the reference on top of the stack."""# XXX: +-1
pops = 1
pushes = 2deftouch_value(self, stack, frame):
stack[-1:] = 2 * stack[-1:]
classROT_TWO(Opcode):
"""Swaps the two top-most stack items."""
pops = 2
pushes = 2deftouch_value(self, stack, frame):
stack[-2:] = stack[-2:][::-1]
classROT_THREE(Opcode):
"""Lifts second and third stack item one position up, moves top down to position three."""
pops = 3
pushes = 3
direct = Truedeftouch_value(self, stack, frame):
v3, v2, v1 = stack[-3:]
stack[-3:] = [v1, v3, v2]
classROT_FOUR(Opcode):
"""Lifts second, third and forth stack item one position up, moves top down to position four."""
pops = 4
pushes = 4
direct = Truedeftouch_value(self, stack, frame):
v4, v3, v2, v1 = stack[-3:]
stack[-3:] = [v1, v4, v3, v2]
classUNARY(Opcode):
"""Unary Operations take the top of the stack, apply the operation, and push the result back on the stack."""
pops = 1
pushes = 1classUNARY_POSITIVE(UNARY):
"""Implements TOS = +TOS."""deftouch_value(self, stack, frame):
stack[-1] = +stack[-1]
classUNARY_NEGATIVE(UNARY):
"""Implements TOS = -TOS."""deftouch_value(self, stack, frame):
stack[-1] = -stack[-1]
classBINARY(Opcode):
"""Binary operations remove the top of the stack (TOS) and the second top-most stack item (TOS1) from the stack. They perform the operation, and put the result back on the stack."""
pops = 2
pushes = 1classBINARY_POWER(BINARY):
"""Implements TOS = TOS1 ** TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
print(TOS1, TOS)
ifabs(TOS1) > BadValue.MAX_ALLOWED_VALUE orabs(TOS) > BadValue.MAX_ALLOWED_VALUE:
raise BadValue("The value for exponent was too big")
stack[-2:] = [TOS1 ** TOS]
classBINARY_MULTIPLY(BINARY):
"""Implements TOS = TOS1 * TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 * TOS]
classBINARY_DIVIDE(BINARY):
"""Implements TOS = TOS1 / TOS when from __future__ import division is not in effect."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 / TOS]
classBINARY_MODULO(BINARY):
"""Implements TOS = TOS1 % TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 % TOS]
classBINARY_ADD(BINARY):
"""Implements TOS = TOS1 + TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 + TOS]
classBINARY_SUBTRACT(BINARY):
"""Implements TOS = TOS1 - TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 - TOS]
classBINARY_FLOOR_DIVIDE(BINARY):
"""Implements TOS = TOS1 // TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 // TOS]
classBINARY_TRUE_DIVIDE(BINARY):
"""Implements TOS = TOS1 / TOS when from __future__ import division is in effect."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 / TOS]
classBINARY_LSHIFT(BINARY):
"""Implements TOS = TOS1 << TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 << TOS]
classBINARY_RSHIFT(BINARY):
"""Implements TOS = TOS1 >> TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 >> TOS]
classBINARY_AND(BINARY):
"""Implements TOS = TOS1 & TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 & TOS]
classBINARY_XOR(BINARY):
"""Implements TOS = TOS1 ^ TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 ^ TOS]
classBINARY_OR(BINARY):
"""Implements TOS = TOS1 | TOS."""deftouch_value(self, stack, frame):
TOS1, TOS = stack[-2:]
stack[-2:] = [TOS1 | TOS]
classRETURN_VALUE(Opcode):
"""Returns with TOS to the caller of the function."""
pops = 1
final = Truedeftouch_value(self, stack, frame):
value = stack.pop()
return value
classLOAD_CONST(OpcodeConst):
"""Pushes co_consts[consti] onto the stack."""# consti
pushes = 1deftouch_value(self, stack, frame):
# XXX moo: Validate type
value = self.get_arg()
assertisinstance(value, (int, float))
stack.append(value)
classLOAD_NAME(OpcodeName):
"""Pushes the value associated with co_names[namei] onto the stack."""# namei
pushes = 1deftouch_value(self, stack, frame):
# XXX moo: Get name from dict of valid variables/functions
name = self.get_arg()
if name notin frame:
raise UnknownSymbol("Does not know symbol {}".format(name))
stack.append(frame[name])
classCALL_FUNCTION(OpcodeArg):
"""Calls a function. The low byte of argc indicates the number of positional parameters, the high byte the number of keyword parameters. On the stack, the opcode finds the keyword parameters first. For each keyword argument, the value is on top of the key. Below the keyword parameters, the positional parameters are on the stack, with the right-most parameter on top. Below the parameters, the function object to call is on the stack. Pops all function arguments, and the function itself off the stack, and pushes the return value."""# argc
pops = None
pushes = 1defget_pops(self):
args = self.oparg & 0xff
kwargs = (self.oparg >> 8) & 0xffreturn1 + args + 2 * kwargs
deftouch_value(self, stack, frame):
argc = self.oparg & 0xff
kwargc = (self.oparg >> 8) & 0xffassert kwargc == 0if argc > 0:
args = stack[-argc:]
stack[:] = stack[:-argc]
else:
args = []
func = stack.pop()
assert func in frame.values(), "Uh-oh somebody injected bad function. This does not happen."
result = func(*args)
stack.append(result)
defcheck_for_pow(expr):
""" Python evaluates power operator during the compile time if its on constants.
You can do CPU / memory burning attack with ``2**999999999999999999999**9999999999999``.
We mainly care about memory now, as we catch timeoutting in any case.
We just disable pow and do not care about it.
"""if"**"in expr:
raise BadCompilingInput("Power operation is not allowed")
def_safe_eval(expr, functions_and_constants={}, check_compiling_input=True):
""" Evaluate a Pythonic math expression and return the output as a string.
The expr is limited to 1024 characters / 1024 operations
to prevent CPU burning or memory stealing.
:param functions_and_constants: Supplied "built-in" data for evaluation
"""# Some safety checksassertlen(expr) < 1024# Check for potential bad compiler inputif check_compiling_input:
check_for_pow(expr)
# Compile Python source code to Python code for eval()
code = compile(expr, '', 'eval')
# Dissect bytecode back to Python opcodes
ops = disassemble(code)
assertlen(ops) < 1024
stack = []
for op in ops:
value = op.touch_value(stack, functions_and_constants)
return value
Post a Comment for "Evaluate Math Equations From Unsafe User Input In Python"