Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,23 @@
import warnings
import sys
from jmespath import parser
from jmespath.visitor import Options
__version__ = '0.10.0'
if sys.version_info[:2] <= (2, 6) or ((3, 0) <= sys.version_info[:2] <= (3, 3)):
python_ver = '.'.join(str(x) for x in sys.version_info[:3])
warnings.warn(
'You are using Python {0}, which will no longer be supported in '
'version 0.11.0'.format(python_ver),
DeprecationWarning)
def compile(expression):
return parser.Parser().parse(expression)
def search(expression, data, options=None):
return parser.Parser().parse(expression).search(data, options=options)

View file

@ -0,0 +1,90 @@
# AST nodes have this structure:
# {"type": <node type>", children: [], "value": ""}
def comparator(name, first, second):
return {'type': 'comparator', 'children': [first, second], 'value': name}
def current_node():
return {'type': 'current', 'children': []}
def expref(expression):
return {'type': 'expref', 'children': [expression]}
def function_expression(name, args):
return {'type': 'function_expression', 'children': args, 'value': name}
def field(name):
return {"type": "field", "children": [], "value": name}
def filter_projection(left, right, comparator):
return {'type': 'filter_projection', 'children': [left, right, comparator]}
def flatten(node):
return {'type': 'flatten', 'children': [node]}
def identity():
return {"type": "identity", 'children': []}
def index(index):
return {"type": "index", "value": index, "children": []}
def index_expression(children):
return {"type": "index_expression", 'children': children}
def key_val_pair(key_name, node):
return {"type": "key_val_pair", 'children': [node], "value": key_name}
def literal(literal_value):
return {'type': 'literal', 'value': literal_value, 'children': []}
def multi_select_dict(nodes):
return {"type": "multi_select_dict", "children": nodes}
def multi_select_list(nodes):
return {"type": "multi_select_list", "children": nodes}
def or_expression(left, right):
return {"type": "or_expression", "children": [left, right]}
def and_expression(left, right):
return {"type": "and_expression", "children": [left, right]}
def not_expression(expr):
return {"type": "not_expression", "children": [expr]}
def pipe(left, right):
return {'type': 'pipe', 'children': [left, right]}
def projection(left, right):
return {'type': 'projection', 'children': [left, right]}
def subexpression(children):
return {"type": "subexpression", 'children': children}
def slice(start, end, step):
return {"type": "slice", "children": [start, end, step]}
def value_projection(left, right):
return {'type': 'value_projection', 'children': [left, right]}

View file

@ -0,0 +1,65 @@
import sys
import inspect
PY2 = sys.version_info[0] == 2
def with_metaclass(meta, *bases):
# Taken from flask/six.
class metaclass(meta):
def __new__(cls, name, this_bases, d):
return meta(name, bases, d)
return type.__new__(metaclass, 'temporary_class', (), {})
if PY2:
text_type = unicode
string_type = basestring
from itertools import izip_longest as zip_longest
def with_str_method(cls):
"""Class decorator that handles __str__ compat between py2 and py3."""
# In python2, the __str__ should be __unicode__
# and __str__ should return bytes.
cls.__unicode__ = cls.__str__
def __str__(self):
return self.__unicode__().encode('utf-8')
cls.__str__ = __str__
return cls
def with_repr_method(cls):
"""Class decorator that handle __repr__ with py2 and py3."""
# This is almost the same thing as with_str_method *except*
# it uses the unicode_escape encoding. This also means we need to be
# careful encoding the input multiple times, so we only encode
# if we get a unicode type.
original_repr_method = cls.__repr__
def __repr__(self):
original_repr = original_repr_method(self)
if isinstance(original_repr, text_type):
original_repr = original_repr.encode('unicode_escape')
return original_repr
cls.__repr__ = __repr__
return cls
def get_methods(cls):
for name, method in inspect.getmembers(cls,
predicate=inspect.ismethod):
yield name, method
else:
text_type = str
string_type = str
from itertools import zip_longest
def with_str_method(cls):
# In python3, we don't need to do anything, we return a str type.
return cls
def with_repr_method(cls):
return cls
def get_methods(cls):
for name, method in inspect.getmembers(cls,
predicate=inspect.isfunction):
yield name, method

View file

@ -0,0 +1,122 @@
from jmespath.compat import with_str_method
class JMESPathError(ValueError):
pass
@with_str_method
class ParseError(JMESPathError):
_ERROR_MESSAGE = 'Invalid jmespath expression'
def __init__(self, lex_position, token_value, token_type,
msg=_ERROR_MESSAGE):
super(ParseError, self).__init__(lex_position, token_value, token_type)
self.lex_position = lex_position
self.token_value = token_value
self.token_type = token_type.upper()
self.msg = msg
# Whatever catches the ParseError can fill in the full expression
self.expression = None
def __str__(self):
# self.lex_position +1 to account for the starting double quote char.
underline = ' ' * (self.lex_position + 1) + '^'
return (
'%s: Parse error at column %s, '
'token "%s" (%s), for expression:\n"%s"\n%s' % (
self.msg, self.lex_position, self.token_value, self.token_type,
self.expression, underline))
@with_str_method
class IncompleteExpressionError(ParseError):
def set_expression(self, expression):
self.expression = expression
self.lex_position = len(expression)
self.token_type = None
self.token_value = None
def __str__(self):
# self.lex_position +1 to account for the starting double quote char.
underline = ' ' * (self.lex_position + 1) + '^'
return (
'Invalid jmespath expression: Incomplete expression:\n'
'"%s"\n%s' % (self.expression, underline))
@with_str_method
class LexerError(ParseError):
def __init__(self, lexer_position, lexer_value, message, expression=None):
self.lexer_position = lexer_position
self.lexer_value = lexer_value
self.message = message
super(LexerError, self).__init__(lexer_position,
lexer_value,
message)
# Whatever catches LexerError can set this.
self.expression = expression
def __str__(self):
underline = ' ' * self.lexer_position + '^'
return 'Bad jmespath expression: %s:\n%s\n%s' % (
self.message, self.expression, underline)
@with_str_method
class ArityError(ParseError):
def __init__(self, expected, actual, name):
self.expected_arity = expected
self.actual_arity = actual
self.function_name = name
self.expression = None
def __str__(self):
return ("Expected %s %s for function %s(), "
"received %s" % (
self.expected_arity,
self._pluralize('argument', self.expected_arity),
self.function_name,
self.actual_arity))
def _pluralize(self, word, count):
if count == 1:
return word
else:
return word + 's'
@with_str_method
class VariadictArityError(ArityError):
def __str__(self):
return ("Expected at least %s %s for function %s(), "
"received %s" % (
self.expected_arity,
self._pluralize('argument', self.expected_arity),
self.function_name,
self.actual_arity))
@with_str_method
class JMESPathTypeError(JMESPathError):
def __init__(self, function_name, current_value, actual_type,
expected_types):
self.function_name = function_name
self.current_value = current_value
self.actual_type = actual_type
self.expected_types = expected_types
def __str__(self):
return ('In function %s(), invalid type for value: %s, '
'expected one of: %s, received: "%s"' % (
self.function_name, self.current_value,
self.expected_types, self.actual_type))
class EmptyExpressionError(JMESPathError):
def __init__(self):
super(EmptyExpressionError, self).__init__(
"Invalid JMESPath expression: cannot be empty.")
class UnknownFunctionError(JMESPathError):
pass

View file

@ -0,0 +1,362 @@
import math
import json
from jmespath import exceptions
from jmespath.compat import string_type as STRING_TYPE
from jmespath.compat import get_methods, with_metaclass
# python types -> jmespath types
TYPES_MAP = {
'bool': 'boolean',
'list': 'array',
'dict': 'object',
'NoneType': 'null',
'unicode': 'string',
'str': 'string',
'float': 'number',
'int': 'number',
'long': 'number',
'OrderedDict': 'object',
'_Projection': 'array',
'_Expression': 'expref',
}
# jmespath types -> python types
REVERSE_TYPES_MAP = {
'boolean': ('bool',),
'array': ('list', '_Projection'),
'object': ('dict', 'OrderedDict',),
'null': ('NoneType',),
'string': ('unicode', 'str'),
'number': ('float', 'int', 'long'),
'expref': ('_Expression',),
}
def signature(*arguments):
def _record_signature(func):
func.signature = arguments
return func
return _record_signature
class FunctionRegistry(type):
def __init__(cls, name, bases, attrs):
cls._populate_function_table()
super(FunctionRegistry, cls).__init__(name, bases, attrs)
def _populate_function_table(cls):
function_table = {}
# Any method with a @signature decorator that also
# starts with "_func_" is registered as a function.
# _func_max_by -> max_by function.
for name, method in get_methods(cls):
if not name.startswith('_func_'):
continue
signature = getattr(method, 'signature', None)
if signature is not None:
function_table[name[6:]] = {
'function': method,
'signature': signature,
}
cls.FUNCTION_TABLE = function_table
class Functions(with_metaclass(FunctionRegistry, object)):
FUNCTION_TABLE = {
}
def call_function(self, function_name, resolved_args):
try:
spec = self.FUNCTION_TABLE[function_name]
except KeyError:
raise exceptions.UnknownFunctionError(
"Unknown function: %s()" % function_name)
function = spec['function']
signature = spec['signature']
self._validate_arguments(resolved_args, signature, function_name)
return function(self, *resolved_args)
def _validate_arguments(self, args, signature, function_name):
if signature and signature[-1].get('variadic'):
if len(args) < len(signature):
raise exceptions.VariadictArityError(
len(signature), len(args), function_name)
elif len(args) != len(signature):
raise exceptions.ArityError(
len(signature), len(args), function_name)
return self._type_check(args, signature, function_name)
def _type_check(self, actual, signature, function_name):
for i in range(len(signature)):
allowed_types = signature[i]['types']
if allowed_types:
self._type_check_single(actual[i], allowed_types,
function_name)
def _type_check_single(self, current, types, function_name):
# Type checking involves checking the top level type,
# and in the case of arrays, potentially checking the types
# of each element.
allowed_types, allowed_subtypes = self._get_allowed_pytypes(types)
# We're not using isinstance() on purpose.
# The type model for jmespath does not map
# 1-1 with python types (booleans are considered
# integers in python for example).
actual_typename = type(current).__name__
if actual_typename not in allowed_types:
raise exceptions.JMESPathTypeError(
function_name, current,
self._convert_to_jmespath_type(actual_typename), types)
# If we're dealing with a list type, we can have
# additional restrictions on the type of the list
# elements (for example a function can require a
# list of numbers or a list of strings).
# Arrays are the only types that can have subtypes.
if allowed_subtypes:
self._subtype_check(current, allowed_subtypes,
types, function_name)
def _get_allowed_pytypes(self, types):
allowed_types = []
allowed_subtypes = []
for t in types:
type_ = t.split('-', 1)
if len(type_) == 2:
type_, subtype = type_
allowed_subtypes.append(REVERSE_TYPES_MAP[subtype])
else:
type_ = type_[0]
allowed_types.extend(REVERSE_TYPES_MAP[type_])
return allowed_types, allowed_subtypes
def _subtype_check(self, current, allowed_subtypes, types, function_name):
if len(allowed_subtypes) == 1:
# The easy case, we know up front what type
# we need to validate.
allowed_subtypes = allowed_subtypes[0]
for element in current:
actual_typename = type(element).__name__
if actual_typename not in allowed_subtypes:
raise exceptions.JMESPathTypeError(
function_name, element, actual_typename, types)
elif len(allowed_subtypes) > 1 and current:
# Dynamic type validation. Based on the first
# type we see, we validate that the remaining types
# match.
first = type(current[0]).__name__
for subtypes in allowed_subtypes:
if first in subtypes:
allowed = subtypes
break
else:
raise exceptions.JMESPathTypeError(
function_name, current[0], first, types)
for element in current:
actual_typename = type(element).__name__
if actual_typename not in allowed:
raise exceptions.JMESPathTypeError(
function_name, element, actual_typename, types)
@signature({'types': ['number']})
def _func_abs(self, arg):
return abs(arg)
@signature({'types': ['array-number']})
def _func_avg(self, arg):
if arg:
return sum(arg) / float(len(arg))
else:
return None
@signature({'types': [], 'variadic': True})
def _func_not_null(self, *arguments):
for argument in arguments:
if argument is not None:
return argument
@signature({'types': []})
def _func_to_array(self, arg):
if isinstance(arg, list):
return arg
else:
return [arg]
@signature({'types': []})
def _func_to_string(self, arg):
if isinstance(arg, STRING_TYPE):
return arg
else:
return json.dumps(arg, separators=(',', ':'),
default=str)
@signature({'types': []})
def _func_to_number(self, arg):
if isinstance(arg, (list, dict, bool)):
return None
elif arg is None:
return None
elif isinstance(arg, (int, float)):
return arg
else:
try:
return int(arg)
except ValueError:
try:
return float(arg)
except ValueError:
return None
@signature({'types': ['array', 'string']}, {'types': []})
def _func_contains(self, subject, search):
return search in subject
@signature({'types': ['string', 'array', 'object']})
def _func_length(self, arg):
return len(arg)
@signature({'types': ['string']}, {'types': ['string']})
def _func_ends_with(self, search, suffix):
return search.endswith(suffix)
@signature({'types': ['string']}, {'types': ['string']})
def _func_starts_with(self, search, suffix):
return search.startswith(suffix)
@signature({'types': ['array', 'string']})
def _func_reverse(self, arg):
if isinstance(arg, STRING_TYPE):
return arg[::-1]
else:
return list(reversed(arg))
@signature({"types": ['number']})
def _func_ceil(self, arg):
return math.ceil(arg)
@signature({"types": ['number']})
def _func_floor(self, arg):
return math.floor(arg)
@signature({"types": ['string']}, {"types": ['array-string']})
def _func_join(self, separator, array):
return separator.join(array)
@signature({'types': ['expref']}, {'types': ['array']})
def _func_map(self, expref, arg):
result = []
for element in arg:
result.append(expref.visit(expref.expression, element))
return result
@signature({"types": ['array-number', 'array-string']})
def _func_max(self, arg):
if arg:
return max(arg)
else:
return None
@signature({"types": ["object"], "variadic": True})
def _func_merge(self, *arguments):
merged = {}
for arg in arguments:
merged.update(arg)
return merged
@signature({"types": ['array-number', 'array-string']})
def _func_min(self, arg):
if arg:
return min(arg)
else:
return None
@signature({"types": ['array-string', 'array-number']})
def _func_sort(self, arg):
return list(sorted(arg))
@signature({"types": ['array-number']})
def _func_sum(self, arg):
return sum(arg)
@signature({"types": ['object']})
def _func_keys(self, arg):
# To be consistent with .values()
# should we also return the indices of a list?
return list(arg.keys())
@signature({"types": ['object']})
def _func_values(self, arg):
return list(arg.values())
@signature({'types': []})
def _func_type(self, arg):
if isinstance(arg, STRING_TYPE):
return "string"
elif isinstance(arg, bool):
return "boolean"
elif isinstance(arg, list):
return "array"
elif isinstance(arg, dict):
return "object"
elif isinstance(arg, (float, int)):
return "number"
elif arg is None:
return "null"
@signature({'types': ['array']}, {'types': ['expref']})
def _func_sort_by(self, array, expref):
if not array:
return array
# sort_by allows for the expref to be either a number of
# a string, so we have some special logic to handle this.
# We evaluate the first array element and verify that it's
# either a string of a number. We then create a key function
# that validates that type, which requires that remaining array
# elements resolve to the same type as the first element.
required_type = self._convert_to_jmespath_type(
type(expref.visit(expref.expression, array[0])).__name__)
if required_type not in ['number', 'string']:
raise exceptions.JMESPathTypeError(
'sort_by', array[0], required_type, ['string', 'number'])
keyfunc = self._create_key_func(expref,
[required_type],
'sort_by')
return list(sorted(array, key=keyfunc))
@signature({'types': ['array']}, {'types': ['expref']})
def _func_min_by(self, array, expref):
keyfunc = self._create_key_func(expref,
['number', 'string'],
'min_by')
if array:
return min(array, key=keyfunc)
else:
return None
@signature({'types': ['array']}, {'types': ['expref']})
def _func_max_by(self, array, expref):
keyfunc = self._create_key_func(expref,
['number', 'string'],
'max_by')
if array:
return max(array, key=keyfunc)
else:
return None
def _create_key_func(self, expref, allowed_types, function_name):
def keyfunc(x):
result = expref.visit(expref.expression, x)
actual_typename = type(result).__name__
jmespath_type = self._convert_to_jmespath_type(actual_typename)
# allowed_types is in term of jmespath types, not python types.
if jmespath_type not in allowed_types:
raise exceptions.JMESPathTypeError(
function_name, result, jmespath_type, allowed_types)
return result
return keyfunc
def _convert_to_jmespath_type(self, pyobject):
return TYPES_MAP.get(pyobject, 'unknown')

View file

@ -0,0 +1,208 @@
import string
import warnings
from json import loads
from jmespath.exceptions import LexerError, EmptyExpressionError
class Lexer(object):
START_IDENTIFIER = set(string.ascii_letters + '_')
VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
VALID_NUMBER = set(string.digits)
WHITESPACE = set(" \t\n\r")
SIMPLE_TOKENS = {
'.': 'dot',
'*': 'star',
']': 'rbracket',
',': 'comma',
':': 'colon',
'@': 'current',
'(': 'lparen',
')': 'rparen',
'{': 'lbrace',
'}': 'rbrace',
}
def tokenize(self, expression):
self._initialize_for_expression(expression)
while self._current is not None:
if self._current in self.SIMPLE_TOKENS:
yield {'type': self.SIMPLE_TOKENS[self._current],
'value': self._current,
'start': self._position, 'end': self._position + 1}
self._next()
elif self._current in self.START_IDENTIFIER:
start = self._position
buff = self._current
while self._next() in self.VALID_IDENTIFIER:
buff += self._current
yield {'type': 'unquoted_identifier', 'value': buff,
'start': start, 'end': start + len(buff)}
elif self._current in self.WHITESPACE:
self._next()
elif self._current == '[':
start = self._position
next_char = self._next()
if next_char == ']':
self._next()
yield {'type': 'flatten', 'value': '[]',
'start': start, 'end': start + 2}
elif next_char == '?':
self._next()
yield {'type': 'filter', 'value': '[?',
'start': start, 'end': start + 2}
else:
yield {'type': 'lbracket', 'value': '[',
'start': start, 'end': start + 1}
elif self._current == "'":
yield self._consume_raw_string_literal()
elif self._current == '|':
yield self._match_or_else('|', 'or', 'pipe')
elif self._current == '&':
yield self._match_or_else('&', 'and', 'expref')
elif self._current == '`':
yield self._consume_literal()
elif self._current in self.VALID_NUMBER:
start = self._position
buff = self._consume_number()
yield {'type': 'number', 'value': int(buff),
'start': start, 'end': start + len(buff)}
elif self._current == '-':
# Negative number.
start = self._position
buff = self._consume_number()
if len(buff) > 1:
yield {'type': 'number', 'value': int(buff),
'start': start, 'end': start + len(buff)}
else:
raise LexerError(lexer_position=start,
lexer_value=buff,
message="Unknown token '%s'" % buff)
elif self._current == '"':
yield self._consume_quoted_identifier()
elif self._current == '<':
yield self._match_or_else('=', 'lte', 'lt')
elif self._current == '>':
yield self._match_or_else('=', 'gte', 'gt')
elif self._current == '!':
yield self._match_or_else('=', 'ne', 'not')
elif self._current == '=':
if self._next() == '=':
yield {'type': 'eq', 'value': '==',
'start': self._position - 1, 'end': self._position}
self._next()
else:
if self._current is None:
# If we're at the EOF, we never advanced
# the position so we don't need to rewind
# it back one location.
position = self._position
else:
position = self._position - 1
raise LexerError(
lexer_position=position,
lexer_value='=',
message="Unknown token '='")
else:
raise LexerError(lexer_position=self._position,
lexer_value=self._current,
message="Unknown token %s" % self._current)
yield {'type': 'eof', 'value': '',
'start': self._length, 'end': self._length}
def _consume_number(self):
start = self._position
buff = self._current
while self._next() in self.VALID_NUMBER:
buff += self._current
return buff
def _initialize_for_expression(self, expression):
if not expression:
raise EmptyExpressionError()
self._position = 0
self._expression = expression
self._chars = list(self._expression)
self._current = self._chars[self._position]
self._length = len(self._expression)
def _next(self):
if self._position == self._length - 1:
self._current = None
else:
self._position += 1
self._current = self._chars[self._position]
return self._current
def _consume_until(self, delimiter):
# Consume until the delimiter is reached,
# allowing for the delimiter to be escaped with "\".
start = self._position
buff = ''
self._next()
while self._current != delimiter:
if self._current == '\\':
buff += '\\'
self._next()
if self._current is None:
# We're at the EOF.
raise LexerError(lexer_position=start,
lexer_value=self._expression[start:],
message="Unclosed %s delimiter" % delimiter)
buff += self._current
self._next()
# Skip the closing delimiter.
self._next()
return buff
def _consume_literal(self):
start = self._position
lexeme = self._consume_until('`').replace('\\`', '`')
try:
# Assume it is valid JSON and attempt to parse.
parsed_json = loads(lexeme)
except ValueError:
try:
# Invalid JSON values should be converted to quoted
# JSON strings during the JEP-12 deprecation period.
parsed_json = loads('"%s"' % lexeme.lstrip())
warnings.warn("deprecated string literal syntax",
PendingDeprecationWarning)
except ValueError:
raise LexerError(lexer_position=start,
lexer_value=self._expression[start:],
message="Bad token %s" % lexeme)
token_len = self._position - start
return {'type': 'literal', 'value': parsed_json,
'start': start, 'end': token_len}
def _consume_quoted_identifier(self):
start = self._position
lexeme = '"' + self._consume_until('"') + '"'
try:
token_len = self._position - start
return {'type': 'quoted_identifier', 'value': loads(lexeme),
'start': start, 'end': token_len}
except ValueError as e:
error_message = str(e).split(':')[0]
raise LexerError(lexer_position=start,
lexer_value=lexeme,
message=error_message)
def _consume_raw_string_literal(self):
start = self._position
lexeme = self._consume_until("'").replace("\\'", "'")
token_len = self._position - start
return {'type': 'literal', 'value': lexeme,
'start': start, 'end': token_len}
def _match_or_else(self, expected, match_type, else_type):
start = self._position
current = self._current
next_char = self._next()
if next_char == expected:
self._next()
return {'type': match_type, 'value': current + next_char,
'start': start, 'end': start + 1}
return {'type': else_type, 'value': current,
'start': start, 'end': start}

View file

@ -0,0 +1,527 @@
"""Top down operator precedence parser.
This is an implementation of Vaughan R. Pratt's
"Top Down Operator Precedence" parser.
(http://dl.acm.org/citation.cfm?doid=512927.512931).
These are some additional resources that help explain the
general idea behind a Pratt parser:
* http://effbot.org/zone/simple-top-down-parsing.htm
* http://javascript.crockford.com/tdop/tdop.html
A few notes on the implementation.
* All the nud/led tokens are on the Parser class itself, and are dispatched
using getattr(). This keeps all the parsing logic contained to a single
class.
* We use two passes through the data. One to create a list of token,
then one pass through the tokens to create the AST. While the lexer actually
yields tokens, we convert it to a list so we can easily implement two tokens
of lookahead. A previous implementation used a fixed circular buffer, but it
was significantly slower. Also, the average jmespath expression typically
does not have a large amount of token so this is not an issue. And
interestingly enough, creating a token list first is actually faster than
consuming from the token iterator one token at a time.
"""
import random
from jmespath import lexer
from jmespath.compat import with_repr_method
from jmespath import ast
from jmespath import exceptions
from jmespath import visitor
class Parser(object):
BINDING_POWER = {
'eof': 0,
'unquoted_identifier': 0,
'quoted_identifier': 0,
'literal': 0,
'rbracket': 0,
'rparen': 0,
'comma': 0,
'rbrace': 0,
'number': 0,
'current': 0,
'expref': 0,
'colon': 0,
'pipe': 1,
'or': 2,
'and': 3,
'eq': 5,
'gt': 5,
'lt': 5,
'gte': 5,
'lte': 5,
'ne': 5,
'flatten': 9,
# Everything above stops a projection.
'star': 20,
'filter': 21,
'dot': 40,
'not': 45,
'lbrace': 50,
'lbracket': 55,
'lparen': 60,
}
# The maximum binding power for a token that can stop
# a projection.
_PROJECTION_STOP = 10
# The _MAX_SIZE most recent expressions are cached in
# _CACHE dict.
_CACHE = {}
_MAX_SIZE = 128
def __init__(self, lookahead=2):
self.tokenizer = None
self._tokens = [None] * lookahead
self._buffer_size = lookahead
self._index = 0
def parse(self, expression):
cached = self._CACHE.get(expression)
if cached is not None:
return cached
parsed_result = self._do_parse(expression)
self._CACHE[expression] = parsed_result
if len(self._CACHE) > self._MAX_SIZE:
self._free_cache_entries()
return parsed_result
def _do_parse(self, expression):
try:
return self._parse(expression)
except exceptions.LexerError as e:
e.expression = expression
raise
except exceptions.IncompleteExpressionError as e:
e.set_expression(expression)
raise
except exceptions.ParseError as e:
e.expression = expression
raise
def _parse(self, expression):
self.tokenizer = lexer.Lexer().tokenize(expression)
self._tokens = list(self.tokenizer)
self._index = 0
parsed = self._expression(binding_power=0)
if not self._current_token() == 'eof':
t = self._lookahead_token(0)
raise exceptions.ParseError(t['start'], t['value'], t['type'],
"Unexpected token: %s" % t['value'])
return ParsedResult(expression, parsed)
def _expression(self, binding_power=0):
left_token = self._lookahead_token(0)
self._advance()
nud_function = getattr(
self, '_token_nud_%s' % left_token['type'],
self._error_nud_token)
left = nud_function(left_token)
current_token = self._current_token()
while binding_power < self.BINDING_POWER[current_token]:
led = getattr(self, '_token_led_%s' % current_token, None)
if led is None:
error_token = self._lookahead_token(0)
self._error_led_token(error_token)
else:
self._advance()
left = led(left)
current_token = self._current_token()
return left
def _token_nud_literal(self, token):
return ast.literal(token['value'])
def _token_nud_unquoted_identifier(self, token):
return ast.field(token['value'])
def _token_nud_quoted_identifier(self, token):
field = ast.field(token['value'])
# You can't have a quoted identifier as a function
# name.
if self._current_token() == 'lparen':
t = self._lookahead_token(0)
raise exceptions.ParseError(
0, t['value'], t['type'],
'Quoted identifier not allowed for function names.')
return field
def _token_nud_star(self, token):
left = ast.identity()
if self._current_token() == 'rbracket':
right = ast.identity()
else:
right = self._parse_projection_rhs(self.BINDING_POWER['star'])
return ast.value_projection(left, right)
def _token_nud_filter(self, token):
return self._token_led_filter(ast.identity())
def _token_nud_lbrace(self, token):
return self._parse_multi_select_hash()
def _token_nud_lparen(self, token):
expression = self._expression()
self._match('rparen')
return expression
def _token_nud_flatten(self, token):
left = ast.flatten(ast.identity())
right = self._parse_projection_rhs(
self.BINDING_POWER['flatten'])
return ast.projection(left, right)
def _token_nud_not(self, token):
expr = self._expression(self.BINDING_POWER['not'])
return ast.not_expression(expr)
def _token_nud_lbracket(self, token):
if self._current_token() in ['number', 'colon']:
right = self._parse_index_expression()
# We could optimize this and remove the identity() node.
# We don't really need an index_expression node, we can
# just use emit an index node here if we're not dealing
# with a slice.
return self._project_if_slice(ast.identity(), right)
elif self._current_token() == 'star' and \
self._lookahead(1) == 'rbracket':
self._advance()
self._advance()
right = self._parse_projection_rhs(self.BINDING_POWER['star'])
return ast.projection(ast.identity(), right)
else:
return self._parse_multi_select_list()
def _parse_index_expression(self):
# We're here:
# [<current>
# ^
# | current token
if (self._lookahead(0) == 'colon' or
self._lookahead(1) == 'colon'):
return self._parse_slice_expression()
else:
# Parse the syntax [number]
node = ast.index(self._lookahead_token(0)['value'])
self._advance()
self._match('rbracket')
return node
def _parse_slice_expression(self):
# [start:end:step]
# Where start, end, and step are optional.
# The last colon is optional as well.
parts = [None, None, None]
index = 0
current_token = self._current_token()
while not current_token == 'rbracket' and index < 3:
if current_token == 'colon':
index += 1
if index == 3:
self._raise_parse_error_for_token(
self._lookahead_token(0), 'syntax error')
self._advance()
elif current_token == 'number':
parts[index] = self._lookahead_token(0)['value']
self._advance()
else:
self._raise_parse_error_for_token(
self._lookahead_token(0), 'syntax error')
current_token = self._current_token()
self._match('rbracket')
return ast.slice(*parts)
def _token_nud_current(self, token):
return ast.current_node()
def _token_nud_expref(self, token):
expression = self._expression(self.BINDING_POWER['expref'])
return ast.expref(expression)
def _token_led_dot(self, left):
if not self._current_token() == 'star':
right = self._parse_dot_rhs(self.BINDING_POWER['dot'])
if left['type'] == 'subexpression':
left['children'].append(right)
return left
else:
return ast.subexpression([left, right])
else:
# We're creating a projection.
self._advance()
right = self._parse_projection_rhs(
self.BINDING_POWER['dot'])
return ast.value_projection(left, right)
def _token_led_pipe(self, left):
right = self._expression(self.BINDING_POWER['pipe'])
return ast.pipe(left, right)
def _token_led_or(self, left):
right = self._expression(self.BINDING_POWER['or'])
return ast.or_expression(left, right)
def _token_led_and(self, left):
right = self._expression(self.BINDING_POWER['and'])
return ast.and_expression(left, right)
def _token_led_lparen(self, left):
if left['type'] != 'field':
# 0 - first func arg or closing paren.
# -1 - '(' token
# -2 - invalid function "name".
prev_t = self._lookahead_token(-2)
raise exceptions.ParseError(
prev_t['start'], prev_t['value'], prev_t['type'],
"Invalid function name '%s'" % prev_t['value'])
name = left['value']
args = []
while not self._current_token() == 'rparen':
expression = self._expression()
if self._current_token() == 'comma':
self._match('comma')
args.append(expression)
self._match('rparen')
function_node = ast.function_expression(name, args)
return function_node
def _token_led_filter(self, left):
# Filters are projections.
condition = self._expression(0)
self._match('rbracket')
if self._current_token() == 'flatten':
right = ast.identity()
else:
right = self._parse_projection_rhs(self.BINDING_POWER['filter'])
return ast.filter_projection(left, right, condition)
def _token_led_eq(self, left):
return self._parse_comparator(left, 'eq')
def _token_led_ne(self, left):
return self._parse_comparator(left, 'ne')
def _token_led_gt(self, left):
return self._parse_comparator(left, 'gt')
def _token_led_gte(self, left):
return self._parse_comparator(left, 'gte')
def _token_led_lt(self, left):
return self._parse_comparator(left, 'lt')
def _token_led_lte(self, left):
return self._parse_comparator(left, 'lte')
def _token_led_flatten(self, left):
left = ast.flatten(left)
right = self._parse_projection_rhs(
self.BINDING_POWER['flatten'])
return ast.projection(left, right)
def _token_led_lbracket(self, left):
token = self._lookahead_token(0)
if token['type'] in ['number', 'colon']:
right = self._parse_index_expression()
if left['type'] == 'index_expression':
# Optimization: if the left node is an index expr,
# we can avoid creating another node and instead just add
# the right node as a child of the left.
left['children'].append(right)
return left
else:
return self._project_if_slice(left, right)
else:
# We have a projection
self._match('star')
self._match('rbracket')
right = self._parse_projection_rhs(self.BINDING_POWER['star'])
return ast.projection(left, right)
def _project_if_slice(self, left, right):
index_expr = ast.index_expression([left, right])
if right['type'] == 'slice':
return ast.projection(
index_expr,
self._parse_projection_rhs(self.BINDING_POWER['star']))
else:
return index_expr
def _parse_comparator(self, left, comparator):
right = self._expression(self.BINDING_POWER[comparator])
return ast.comparator(comparator, left, right)
def _parse_multi_select_list(self):
expressions = []
while True:
expression = self._expression()
expressions.append(expression)
if self._current_token() == 'rbracket':
break
else:
self._match('comma')
self._match('rbracket')
return ast.multi_select_list(expressions)
def _parse_multi_select_hash(self):
pairs = []
while True:
key_token = self._lookahead_token(0)
# Before getting the token value, verify it's
# an identifier.
self._match_multiple_tokens(
token_types=['quoted_identifier', 'unquoted_identifier'])
key_name = key_token['value']
self._match('colon')
value = self._expression(0)
node = ast.key_val_pair(key_name=key_name, node=value)
pairs.append(node)
if self._current_token() == 'comma':
self._match('comma')
elif self._current_token() == 'rbrace':
self._match('rbrace')
break
return ast.multi_select_dict(nodes=pairs)
def _parse_projection_rhs(self, binding_power):
# Parse the right hand side of the projection.
if self.BINDING_POWER[self._current_token()] < self._PROJECTION_STOP:
# BP of 10 are all the tokens that stop a projection.
right = ast.identity()
elif self._current_token() == 'lbracket':
right = self._expression(binding_power)
elif self._current_token() == 'filter':
right = self._expression(binding_power)
elif self._current_token() == 'dot':
self._match('dot')
right = self._parse_dot_rhs(binding_power)
else:
self._raise_parse_error_for_token(self._lookahead_token(0),
'syntax error')
return right
def _parse_dot_rhs(self, binding_power):
# From the grammar:
# expression '.' ( identifier /
# multi-select-list /
# multi-select-hash /
# function-expression /
# *
# In terms of tokens that means that after a '.',
# you can have:
lookahead = self._current_token()
# Common case "foo.bar", so first check for an identifier.
if lookahead in ['quoted_identifier', 'unquoted_identifier', 'star']:
return self._expression(binding_power)
elif lookahead == 'lbracket':
self._match('lbracket')
return self._parse_multi_select_list()
elif lookahead == 'lbrace':
self._match('lbrace')
return self._parse_multi_select_hash()
else:
t = self._lookahead_token(0)
allowed = ['quoted_identifier', 'unquoted_identifier',
'lbracket', 'lbrace']
msg = (
"Expecting: %s, got: %s" % (allowed, t['type'])
)
self._raise_parse_error_for_token(t, msg)
def _error_nud_token(self, token):
if token['type'] == 'eof':
raise exceptions.IncompleteExpressionError(
token['start'], token['value'], token['type'])
self._raise_parse_error_for_token(token, 'invalid token')
def _error_led_token(self, token):
self._raise_parse_error_for_token(token, 'invalid token')
def _match(self, token_type=None):
# inline'd self._current_token()
if self._current_token() == token_type:
# inline'd self._advance()
self._advance()
else:
self._raise_parse_error_maybe_eof(
token_type, self._lookahead_token(0))
def _match_multiple_tokens(self, token_types):
if self._current_token() not in token_types:
self._raise_parse_error_maybe_eof(
token_types, self._lookahead_token(0))
self._advance()
def _advance(self):
self._index += 1
def _current_token(self):
return self._tokens[self._index]['type']
def _lookahead(self, number):
return self._tokens[self._index + number]['type']
def _lookahead_token(self, number):
return self._tokens[self._index + number]
def _raise_parse_error_for_token(self, token, reason):
lex_position = token['start']
actual_value = token['value']
actual_type = token['type']
raise exceptions.ParseError(lex_position, actual_value,
actual_type, reason)
def _raise_parse_error_maybe_eof(self, expected_type, token):
lex_position = token['start']
actual_value = token['value']
actual_type = token['type']
if actual_type == 'eof':
raise exceptions.IncompleteExpressionError(
lex_position, actual_value, actual_type)
message = 'Expecting: %s, got: %s' % (expected_type,
actual_type)
raise exceptions.ParseError(
lex_position, actual_value, actual_type, message)
def _free_cache_entries(self):
for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)):
self._CACHE.pop(key, None)
@classmethod
def purge(cls):
"""Clear the expression compilation cache."""
cls._CACHE.clear()
@with_repr_method
class ParsedResult(object):
def __init__(self, expression, parsed):
self.expression = expression
self.parsed = parsed
def search(self, value, options=None):
interpreter = visitor.TreeInterpreter(options)
result = interpreter.visit(self.parsed, value)
return result
def _render_dot_file(self):
"""Render the parsed AST as a dot file.
Note that this is marked as an internal method because
the AST is an implementation detail and is subject
to change. This method can be used to help troubleshoot
or for development purposes, but is not considered part
of the public supported API. Use at your own risk.
"""
renderer = visitor.GraphvizVisitor()
contents = renderer.visit(self.parsed)
return contents
def __repr__(self):
return repr(self.parsed)

View file

@ -0,0 +1,328 @@
import operator
from jmespath import functions
from jmespath.compat import string_type
from numbers import Number
def _equals(x, y):
if _is_special_integer_case(x, y):
return False
else:
return x == y
def _is_special_integer_case(x, y):
# We need to special case comparing 0 or 1 to
# True/False. While normally comparing any
# integer other than 0/1 to True/False will always
# return False. However 0/1 have this:
# >>> 0 == True
# False
# >>> 0 == False
# True
# >>> 1 == True
# True
# >>> 1 == False
# False
#
# Also need to consider that:
# >>> 0 in [True, False]
# True
if type(x) is int and (x == 0 or x == 1):
return y is True or y is False
elif type(y) is int and (y == 0 or y == 1):
return x is True or x is False
def _is_comparable(x):
# The spec doesn't officially support string types yet,
# but enough people are relying on this behavior that
# it's been added back. This should eventually become
# part of the official spec.
return _is_actual_number(x) or isinstance(x, string_type)
def _is_actual_number(x):
# We need to handle python's quirkiness with booleans,
# specifically:
#
# >>> isinstance(False, int)
# True
# >>> isinstance(True, int)
# True
if x is True or x is False:
return False
return isinstance(x, Number)
class Options(object):
"""Options to control how a JMESPath function is evaluated."""
def __init__(self, dict_cls=None, custom_functions=None):
#: The class to use when creating a dict. The interpreter
# may create dictionaries during the evaluation of a JMESPath
# expression. For example, a multi-select hash will
# create a dictionary. By default we use a dict() type.
# You can set this value to change what dict type is used.
# The most common reason you would change this is if you
# want to set a collections.OrderedDict so that you can
# have predictable key ordering.
self.dict_cls = dict_cls
self.custom_functions = custom_functions
class _Expression(object):
def __init__(self, expression, interpreter):
self.expression = expression
self.interpreter = interpreter
def visit(self, node, *args, **kwargs):
return self.interpreter.visit(node, *args, **kwargs)
class Visitor(object):
def __init__(self):
self._method_cache = {}
def visit(self, node, *args, **kwargs):
node_type = node['type']
method = self._method_cache.get(node_type)
if method is None:
method = getattr(
self, 'visit_%s' % node['type'], self.default_visit)
self._method_cache[node_type] = method
return method(node, *args, **kwargs)
def default_visit(self, node, *args, **kwargs):
raise NotImplementedError("default_visit")
class TreeInterpreter(Visitor):
COMPARATOR_FUNC = {
'eq': _equals,
'ne': lambda x, y: not _equals(x, y),
'lt': operator.lt,
'gt': operator.gt,
'lte': operator.le,
'gte': operator.ge
}
_EQUALITY_OPS = ['eq', 'ne']
MAP_TYPE = dict
def __init__(self, options=None):
super(TreeInterpreter, self).__init__()
self._dict_cls = self.MAP_TYPE
if options is None:
options = Options()
self._options = options
if options.dict_cls is not None:
self._dict_cls = self._options.dict_cls
if options.custom_functions is not None:
self._functions = self._options.custom_functions
else:
self._functions = functions.Functions()
def default_visit(self, node, *args, **kwargs):
raise NotImplementedError(node['type'])
def visit_subexpression(self, node, value):
result = value
for node in node['children']:
result = self.visit(node, result)
return result
def visit_field(self, node, value):
try:
return value.get(node['value'])
except AttributeError:
return None
def visit_comparator(self, node, value):
# Common case: comparator is == or !=
comparator_func = self.COMPARATOR_FUNC[node['value']]
if node['value'] in self._EQUALITY_OPS:
return comparator_func(
self.visit(node['children'][0], value),
self.visit(node['children'][1], value)
)
else:
# Ordering operators are only valid for numbers.
# Evaluating any other type with a comparison operator
# will yield a None value.
left = self.visit(node['children'][0], value)
right = self.visit(node['children'][1], value)
num_types = (int, float)
if not (_is_comparable(left) and
_is_comparable(right)):
return None
return comparator_func(left, right)
def visit_current(self, node, value):
return value
def visit_expref(self, node, value):
return _Expression(node['children'][0], self)
def visit_function_expression(self, node, value):
resolved_args = []
for child in node['children']:
current = self.visit(child, value)
resolved_args.append(current)
return self._functions.call_function(node['value'], resolved_args)
def visit_filter_projection(self, node, value):
base = self.visit(node['children'][0], value)
if not isinstance(base, list):
return None
comparator_node = node['children'][2]
collected = []
for element in base:
if self._is_true(self.visit(comparator_node, element)):
current = self.visit(node['children'][1], element)
if current is not None:
collected.append(current)
return collected
def visit_flatten(self, node, value):
base = self.visit(node['children'][0], value)
if not isinstance(base, list):
# Can't flatten the object if it's not a list.
return None
merged_list = []
for element in base:
if isinstance(element, list):
merged_list.extend(element)
else:
merged_list.append(element)
return merged_list
def visit_identity(self, node, value):
return value
def visit_index(self, node, value):
# Even though we can index strings, we don't
# want to support that.
if not isinstance(value, list):
return None
try:
return value[node['value']]
except IndexError:
return None
def visit_index_expression(self, node, value):
result = value
for node in node['children']:
result = self.visit(node, result)
return result
def visit_slice(self, node, value):
if not isinstance(value, list):
return None
s = slice(*node['children'])
return value[s]
def visit_key_val_pair(self, node, value):
return self.visit(node['children'][0], value)
def visit_literal(self, node, value):
return node['value']
def visit_multi_select_dict(self, node, value):
if value is None:
return None
collected = self._dict_cls()
for child in node['children']:
collected[child['value']] = self.visit(child, value)
return collected
def visit_multi_select_list(self, node, value):
if value is None:
return None
collected = []
for child in node['children']:
collected.append(self.visit(child, value))
return collected
def visit_or_expression(self, node, value):
matched = self.visit(node['children'][0], value)
if self._is_false(matched):
matched = self.visit(node['children'][1], value)
return matched
def visit_and_expression(self, node, value):
matched = self.visit(node['children'][0], value)
if self._is_false(matched):
return matched
return self.visit(node['children'][1], value)
def visit_not_expression(self, node, value):
original_result = self.visit(node['children'][0], value)
if type(original_result) is int and original_result == 0:
# Special case for 0, !0 should be false, not true.
# 0 is not a special cased integer in jmespath.
return False
return not original_result
def visit_pipe(self, node, value):
result = value
for node in node['children']:
result = self.visit(node, result)
return result
def visit_projection(self, node, value):
base = self.visit(node['children'][0], value)
if not isinstance(base, list):
return None
collected = []
for element in base:
current = self.visit(node['children'][1], element)
if current is not None:
collected.append(current)
return collected
def visit_value_projection(self, node, value):
base = self.visit(node['children'][0], value)
try:
base = base.values()
except AttributeError:
return None
collected = []
for element in base:
current = self.visit(node['children'][1], element)
if current is not None:
collected.append(current)
return collected
def _is_false(self, value):
# This looks weird, but we're explicitly using equality checks
# because the truth/false values are different between
# python and jmespath.
return (value == '' or value == [] or value == {} or value is None or
value is False)
def _is_true(self, value):
return not self._is_false(value)
class GraphvizVisitor(Visitor):
def __init__(self):
super(GraphvizVisitor, self).__init__()
self._lines = []
self._count = 1
def visit(self, node, *args, **kwargs):
self._lines.append('digraph AST {')
current = '%s%s' % (node['type'], self._count)
self._count += 1
self._visit(node, current)
self._lines.append('}')
return '\n'.join(self._lines)
def _visit(self, node, current):
self._lines.append('%s [label="%s(%s)"]' % (
current, node['type'], node.get('value', '')))
for child in node.get('children', []):
child_name = '%s%s' % (child['type'], self._count)
self._count += 1
self._lines.append(' %s -> %s' % (current, child_name))
self._visit(child, child_name)