Ausgabe der neuen DB Einträge

This commit is contained in:
hubobel 2022-01-02 21:50:48 +01:00
parent bad48e1627
commit cfbbb9ee3d
2399 changed files with 843193 additions and 43 deletions

View file

@ -0,0 +1,443 @@
from collections import OrderedDict
import six
class ASTNode(object):
def __init__(self, path, lineno, lexpos):
"""
Args:
lineno (int): The line number where the start of this element
occurs.
lexpos (int): The character offset into the file where this element
occurs.
"""
self.path = path
self.lineno = lineno
self.lexpos = lexpos
class AstNamespace(ASTNode):
def __init__(self, path, lineno, lexpos, name, doc):
"""
Args:
name (str): The namespace of the spec.
doc (Optional[str]): The docstring for this namespace.
"""
super(AstNamespace, self).__init__(path, lineno, lexpos)
self.name = name
self.doc = doc
def __str__(self):
return self.__repr__()
def __repr__(self):
return 'AstNamespace({!r})'.format(self.name)
class AstImport(ASTNode):
def __init__(self, path, lineno, lexpos, target):
"""
Args:
target (str): The name of the namespace to import.
"""
super(AstImport, self).__init__(path, lineno, lexpos)
self.target = target
def __str__(self):
return self.__repr__()
def __repr__(self):
return 'AstImport({!r})'.format(self.target)
class AstAlias(ASTNode):
def __init__(self, path, lineno, lexpos, name, type_ref, doc):
"""
Args:
name (str): The name of the alias.
type_ref (AstTypeRef): The data type of the field.
doc (Optional[str]): Documentation string for the alias.
"""
super(AstAlias, self).__init__(path, lineno, lexpos)
self.name = name
self.type_ref = type_ref
self.doc = doc
self.annotations = []
def set_annotations(self, annotations):
self.annotations = annotations
def __repr__(self):
return 'AstAlias({!r}, {!r})'.format(self.name, self.type_ref)
class AstTypeDef(ASTNode):
def __init__(self, path, lineno, lexpos, name, extends, doc, fields,
examples):
"""
Args:
name (str): Name assigned to the type.
extends (Optional[str]); Name of the type this inherits from.
doc (Optional[str]): Docstring for the type.
fields (List[AstField]): Fields of a type, not including
inherited ones.
examples (Optional[OrderedDict[str, AstExample]]): Map from label
to example.
"""
super(AstTypeDef, self).__init__(path, lineno, lexpos)
self.name = name
assert isinstance(extends, (AstTypeRef, type(None))), type(extends)
self.extends = extends
assert isinstance(doc, (six.text_type, type(None)))
self.doc = doc
assert isinstance(fields, list)
self.fields = fields
assert isinstance(examples, (OrderedDict, type(None))), type(examples)
self.examples = examples
def __str__(self):
return self.__repr__()
def __repr__(self):
return 'AstTypeDef({!r}, {!r}, {!r})'.format(
self.name,
self.extends,
self.fields,
)
class AstStructDef(AstTypeDef):
def __init__(self, path, lineno, lexpos, name, extends, doc, fields,
examples, subtypes=None):
"""
Args:
subtypes (Tuple[List[AstSubtypeField], bool]): Inner list
enumerates subtypes. The bool indicates whether this struct
is a catch-all.
See AstTypeDef for other constructor args.
"""
super(AstStructDef, self).__init__(
path, lineno, lexpos, name, extends, doc, fields, examples)
assert isinstance(subtypes, (tuple, type(None))), type(subtypes)
self.subtypes = subtypes
def __repr__(self):
return 'AstStructDef({!r}, {!r}, {!r})'.format(
self.name,
self.extends,
self.fields,
)
class AstStructPatch(ASTNode):
def __init__(self, path, lineno, lexpos, name, fields, examples):
super(AstStructPatch, self).__init__(path, lineno, lexpos)
self.name = name
assert isinstance(fields, list)
self.fields = fields
assert isinstance(examples, (OrderedDict, type(None))), type(examples)
self.examples = examples
def __repr__(self):
return 'AstStructPatch({!r}, {!r})'.format(
self.name,
self.fields,
)
class AstUnionDef(AstTypeDef):
def __init__(self, path, lineno, lexpos, name, extends, doc, fields,
examples, closed=False):
"""
Args:
closed (bool): Set if this is a closed union.
See AstTypeDef for other constructor args.
"""
super(AstUnionDef, self).__init__(
path, lineno, lexpos, name, extends, doc, fields, examples)
self.closed = closed
def __repr__(self):
return 'AstUnionDef({!r}, {!r}, {!r}, {!r})'.format(
self.name,
self.extends,
self.fields,
self.closed,
)
class AstUnionPatch(ASTNode):
def __init__(self, path, lineno, lexpos, name, fields, examples, closed):
super(AstUnionPatch, self).__init__(path, lineno, lexpos)
self.name = name
assert isinstance(fields, list)
self.fields = fields
assert isinstance(examples, (OrderedDict, type(None))), type(examples)
self.examples = examples
self.closed = closed
def __repr__(self):
return 'AstUnionPatch({!r}, {!r}, {!r})'.format(
self.name,
self.fields,
self.closed,
)
class AstTypeRef(ASTNode):
def __init__(self, path, lineno, lexpos, name, args, nullable, ns):
"""
Args:
name (str): Name of the referenced type.
args (tuple[list, dict]): Arguments to type.
nullable (bool): Whether the type is nullable (can be null)
ns (Optional[str]): Namespace that referred type is a member of.
If none, then refers to the current namespace.
"""
super(AstTypeRef, self).__init__(path, lineno, lexpos)
self.name = name
self.args = args
self.nullable = nullable
self.ns = ns
def __repr__(self):
return 'AstTypeRef({!r}, {!r}, {!r}, {!r})'.format(
self.name,
self.args,
self.nullable,
self.ns,
)
class AstTagRef(ASTNode):
def __init__(self, path, lineno, lexpos, tag):
"""
Args:
tag (str): Name of the referenced type.
"""
super(AstTagRef, self).__init__(path, lineno, lexpos)
self.tag = tag
def __repr__(self):
return 'AstTagRef({!r})'.format(
self.tag,
)
class AstAnnotationRef(ASTNode):
def __init__(self, path, lineno, lexpos, annotation, ns):
"""
Args:
annotation (str): Name of the referenced annotation.
"""
super(AstAnnotationRef, self).__init__(path, lineno, lexpos)
self.annotation = annotation
self.ns = ns
def __repr__(self):
return 'AstAnnotationRef({!r}, {!r})'.format(
self.annotation, self.ns
)
class AstAnnotationDef(ASTNode):
def __init__(self, path, lineno, lexpos, name, annotation_type,
annotation_type_ns, args, kwargs):
"""
Args:
name (str): Name of the defined annotation.
annotation_type (str): Type of annotation to define.
annotation_type_ns (Optional[str]): Namespace where the annotation
type was defined. If None, current namespace or builtin.
args (str): Arguments to define annotation.
kwargs (str): Keyword Arguments to define annotation.
"""
super(AstAnnotationDef, self).__init__(path, lineno, lexpos)
self.name = name
self.annotation_type = annotation_type
self.annotation_type_ns = annotation_type_ns
self.args = args
self.kwargs = kwargs
def __repr__(self):
return 'AstAnnotationDef({!r}, {!r}, {!r}, {!r}, {!r})'.format(
self.name,
self.annotation_type,
self.annotation_type_ns,
self.args,
self.kwargs,
)
class AstAnnotationTypeDef(ASTNode):
def __init__(self, path, lineno, lexpos, name, doc, params):
"""
Args:
name (str): Name of the defined annotation type.
doc (str): Docstring for the defined annotation type.
params (List[AstField]): Parameters that can be passed to the
annotation type.
"""
super(AstAnnotationTypeDef, self).__init__(path, lineno, lexpos)
self.name = name
self.doc = doc
self.params = params
def __repr__(self):
return 'AstAnnotationTypeDef({!r}, {!r}, {!r})'.format(
self.name,
self.doc,
self.params,
)
class AstField(ASTNode):
"""
Represents both a field of a struct and a field of a union.
TODO(kelkabany): Split this into two different classes.
"""
def __init__(self, path, lineno, lexpos, name, type_ref):
"""
Args:
name (str): The name of the field.
type_ref (AstTypeRef): The data type of the field.
"""
super(AstField, self).__init__(path, lineno, lexpos)
self.name = name
self.type_ref = type_ref
self.doc = None
self.has_default = False
self.default = None
self.annotations = []
def set_doc(self, docstring):
self.doc = docstring
def set_default(self, default):
self.has_default = True
self.default = default
def set_annotations(self, annotations):
self.annotations = annotations
def __repr__(self):
return 'AstField({!r}, {!r}, {!r})'.format(
self.name,
self.type_ref,
self.annotations,
)
class AstVoidField(ASTNode):
def __init__(self, path, lineno, lexpos, name):
super(AstVoidField, self).__init__(path, lineno, lexpos)
self.name = name
self.doc = None
self.annotations = []
def set_doc(self, docstring):
self.doc = docstring
def set_annotations(self, annotations):
self.annotations = annotations
def __str__(self):
return self.__repr__()
def __repr__(self):
return 'AstVoidField({!r}, {!r})'.format(
self.name,
self.annotations,
)
class AstSubtypeField(ASTNode):
def __init__(self, path, lineno, lexpos, name, type_ref):
super(AstSubtypeField, self).__init__(path, lineno, lexpos)
self.name = name
self.type_ref = type_ref
def __repr__(self):
return 'AstSubtypeField({!r}, {!r})'.format(
self.name,
self.type_ref,
)
class AstRouteDef(ASTNode):
def __init__(self, path, lineno, lexpos, name, version, deprecated,
arg_type_ref, result_type_ref, error_type_ref=None):
super(AstRouteDef, self).__init__(path, lineno, lexpos)
self.name = name
self.version = version
self.deprecated = deprecated
self.arg_type_ref = arg_type_ref
self.result_type_ref = result_type_ref
self.error_type_ref = error_type_ref
self.doc = None
self.attrs = {}
def set_doc(self, docstring):
self.doc = docstring
def set_attrs(self, attrs):
self.attrs = attrs
class AstAttrField(ASTNode):
def __init__(self, path, lineno, lexpos, name, value):
super(AstAttrField, self).__init__(path, lineno, lexpos)
self.name = name
self.value = value
def __repr__(self):
return 'AstAttrField({!r}, {!r})'.format(
self.name,
self.value,
)
class AstExample(ASTNode):
def __init__(self, path, lineno, lexpos, label, text, fields):
super(AstExample, self).__init__(path, lineno, lexpos)
self.label = label
self.text = text
self.fields = fields
def __repr__(self):
return 'AstExample({!r}, {!r}, {!r})'.format(
self.label,
self.text,
self.fields,
)
class AstExampleField(ASTNode):
def __init__(self, path, lineno, lexpos, name, value):
super(AstExampleField, self).__init__(path, lineno, lexpos)
self.name = name
self.value = value
def __repr__(self):
return 'AstExampleField({!r}, {!r})'.format(
self.name,
self.value,
)
class AstExampleRef(ASTNode):
def __init__(self, path, lineno, lexpos, label):
super(AstExampleRef, self).__init__(path, lineno, lexpos)
self.label = label
def __repr__(self):
return 'AstExampleRef({!r})'.format(self.label)

View file

@ -0,0 +1,28 @@
import six
class InvalidSpec(Exception):
"""Raise this to indicate there was an error in a specification."""
def __init__(self, msg, lineno, path=None):
"""
Args:
msg: Error message intended for the spec writer to read.
lineno: The line number the error occurred on.
path: Path to the spec file with the error.
"""
super(InvalidSpec, self).__init__()
assert isinstance(msg, six.text_type), type(msg)
assert isinstance(lineno, (six.integer_types, type(None))), type(lineno)
self.msg = msg
self.lineno = lineno
self.path = path
def __str__(self):
return repr(self)
def __repr__(self):
return 'InvalidSpec({!r}, {!r}, {!r})'.format(
self.msg,
self.lineno,
self.path,
)

View file

@ -0,0 +1,55 @@
import logging
from .exception import InvalidSpec
from .parser import (
ParserFactory,
)
from .ir_generator import IRGenerator
logger = logging.getLogger('stone.frontend.frontend')
# FIXME: Version should not have a default.
def specs_to_ir(specs, version='0.1b1', debug=False, route_whitelist_filter=None):
"""
Converts a collection of Stone specifications into the intermediate
representation used by Stone backends.
The process is: Lexer -> Parser -> Semantic Analyzer -> IR Generator.
The code is structured as:
1. Parser (Lexer embedded within)
2. IR Generator (Semantic Analyzer embedded within)
:type specs: List[Tuple[path: str, text: str]]
:param specs: `path` is never accessed and is only used to report the
location of a bad spec to the user. `spec` is the text contents of
a spec (.stone) file.
:raises: InvalidSpec
:returns: stone.ir.Api
"""
parser_factory = ParserFactory(debug=debug)
partial_asts = []
for path, text in specs:
logger.info('Parsing spec %s', path)
parser = parser_factory.get_parser()
if debug:
parser.test_lexing(text)
partial_ast = parser.parse(text, path)
if parser.got_errors_parsing():
# TODO(kelkabany): Show more than one error at a time.
msg, lineno, path = parser.get_errors()[0]
raise InvalidSpec(msg, lineno, path)
elif len(partial_ast) == 0:
logger.info('Empty spec: %s', path)
else:
partial_asts.append(partial_ast)
return IRGenerator(partial_asts, version, debug=debug,
route_whitelist_filter=route_whitelist_filter).generate_IR()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,446 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import os
import ply.lex as lex
_MYPY = False
if _MYPY:
import typing # noqa: F401 # pylint: disable=import-error,unused-import,useless-suppression
class MultiToken(object):
"""Object used to monkeypatch ply.lex so that we can return multiple
tokens from one lex operation."""
def __init__(self, tokens):
self.type = tokens[0].type
self.tokens = tokens
# Represents a null value. We want to differentiate between the Python "None"
# and null in several places.
NullToken = object()
class Lexer(object):
"""
Lexer. Tokenizes stone files.
"""
states = (
('WSIGNORE', 'inclusive'),
)
def __init__(self):
self.lex = None
self.tokens_queue = None
# The current indentation "level" rather than a count of spaces.
self.cur_indent = None
self._logger = logging.getLogger('stone.stone.lexer')
self.last_token = None
# [(character, line number), ...]
self.errors = []
def input(self, file_data, **kwargs):
"""
Required by ply.yacc for this to quack (duck typing) like a ply lexer.
:param str file_data: Contents of the file to lex.
"""
self.lex = lex.lex(module=self, **kwargs)
self.tokens_queue = []
self.cur_indent = 0
# Hack to avoid tokenization bugs caused by files that do not end in a
# new line.
self.lex.input(file_data + '\n')
def token(self):
"""
Returns the next LexToken. Returns None when all tokens have been
exhausted.
"""
if self.tokens_queue:
self.last_token = self.tokens_queue.pop(0)
else:
r = self.lex.token()
if isinstance(r, MultiToken):
self.tokens_queue.extend(r.tokens)
self.last_token = self.tokens_queue.pop(0)
else:
if r is None and self.cur_indent > 0:
if (self.last_token and
self.last_token.type not in ('NEWLINE', 'LINE')):
newline_token = _create_token(
'NEWLINE', '\n', self.lex.lineno, self.lex.lexpos)
self.tokens_queue.append(newline_token)
dedent_count = self.cur_indent
dedent_token = _create_token(
'DEDENT', '\t', self.lex.lineno, self.lex.lexpos)
self.tokens_queue.extend([dedent_token] * dedent_count)
self.cur_indent = 0
self.last_token = self.tokens_queue.pop(0)
else:
self.last_token = r
return self.last_token
def test(self, data):
"""Logs all tokens for human inspection. Useful for debugging."""
self.input(data)
while True:
token = self.token()
if not token:
break
self._logger.debug('Token %r', token)
# List of token names
tokens = (
'ID',
'KEYWORD',
'PATH',
'DOT',
) # type: typing.Tuple[typing.Text, ...]
# Whitespace tokens
tokens += (
'DEDENT',
'INDENT',
'NEWLINE',
)
# Attribute lists, aliases
tokens += (
'COMMA',
'EQ',
'LPAR',
'RPAR',
)
# Primitive types
tokens += (
'BOOLEAN',
'FLOAT',
'INTEGER',
'NULL',
'STRING',
)
# List notation
tokens += (
'LBRACKET',
'RBRACKET',
)
# Map notation
tokens += (
'LBRACE',
'RBRACE',
'COLON',
)
tokens += (
'Q',
)
# Annotation notation
tokens += (
'AT',
)
# Regular expression rules for simple tokens
t_DOT = r'\.'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_EQ = r'='
t_COMMA = r','
t_Q = r'\?'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COLON = r'\:'
t_AT = r'@'
# TODO(kelkabany): Use scoped/conditional lexing to restrict where keywords
# are identified as such.
KEYWORDS = [
'alias',
'annotation',
'annotation_type',
'attrs',
'by',
'deprecated',
'doc',
'example',
'error',
'extends',
'import',
'namespace',
'patch',
'route',
'struct',
'union',
'union_closed',
]
RESERVED = {
'annotation': 'ANNOTATION',
'annotation_type': 'ANNOTATION_TYPE',
'attrs': 'ATTRS',
'deprecated': 'DEPRECATED',
'by': 'BY',
'extends': 'EXTENDS',
'import': 'IMPORT',
'patch': 'PATCH',
'route': 'ROUTE',
'struct': 'STRUCT',
'union': 'UNION',
'union_closed': 'UNION_CLOSED',
}
tokens += tuple(RESERVED.values())
def t_LPAR(self, token):
r'\('
token.lexer.push_state('WSIGNORE')
return token
def t_RPAR(self, token):
r'\)'
token.lexer.pop_state()
return token
def t_ANY_BOOLEAN(self, token):
r'\btrue\b|\bfalse\b'
token.value = (token.value == 'true')
return token
def t_ANY_NULL(self, token):
r'\bnull\b'
token.value = NullToken
return token
# No leading digits
def t_ANY_ID(self, token):
r'[a-zA-Z_][a-zA-Z0-9_-]*'
if token.value in self.KEYWORDS:
if (token.value == 'annotation_type') and self.cur_indent:
# annotation_type was added as a reserved keyword relatively
# late, when there could be identifers with the same name
# in existing specs. because annotation_type-the-keyword can
# only be used at the beginning of a non-indented line, this
# check lets both the keyword and the identifer coexist and
# maintains backward compatibility.
# Note: this is kind of a hack, and we should get rid of it if
# the lexer gets better at telling keywords from identifiers in general.
return token
token.type = self.RESERVED.get(token.value, 'KEYWORD')
return token
else:
return token
def t_ANY_PATH(self, token):
r'\/[/a-zA-Z0-9_-]*'
return token
def t_ANY_FLOAT(self, token):
r'-?\d+(\.\d*(e-?\d+)?|e-?\d+)'
token.value = float(token.value)
return token
def t_ANY_INTEGER(self, token):
r'-?\d+'
token.value = int(token.value)
return token
# Read in a string while respecting the following escape sequences:
# \", \\, \n, and \t.
def t_ANY_STRING(self, t):
r'\"([^\\"]|(\\.))*\"'
escaped = 0
t.lexer.lineno += t.value.count('\n')
s = t.value[1:-1]
new_str = ""
for i in range(0, len(s)):
c = s[i]
if escaped:
if c == 'n':
c = '\n'
elif c == 't':
c = '\t'
new_str += c
escaped = 0
else:
if c == '\\':
escaped = 1
else:
new_str += c
# remove current indentation
indentation_str = ' ' * _indent_level_to_spaces_count(self.cur_indent)
lines_without_indentation = [
line.replace(indentation_str, '', 1)
for line in new_str.splitlines()]
t.value = '\n'.join(lines_without_indentation)
return t
# Ignore comments.
# There are two types of comments.
# 1. Comments that take up a full line. These lines are ignored entirely.
# 2. Comments that come after tokens in the same line. These comments
# are ignored, but, we still need to emit a NEWLINE since this rule
# takes all trailing newlines.
# Regardless of comment type, the following line must be checked for a
# DEDENT or INDENT.
def t_INITIAL_comment(self, token):
r'[#][^\n]*\n+'
token.lexer.lineno += token.value.count('\n')
# Scan backwards from the comment hash to figure out which type of
# comment this is. If we find an non-ws character, we know it was a
# partial line. But, if we find a newline before a non-ws character,
# then we know the entire line was a comment.
i = token.lexpos - 1
while i >= 0:
is_full_line_comment = token.lexer.lexdata[i] == '\n'
is_partial_line_comment = (not is_full_line_comment and
token.lexer.lexdata[i] != ' ')
if is_full_line_comment or is_partial_line_comment:
newline_token = _create_token('NEWLINE', '\n',
token.lineno, token.lexpos + len(token.value) - 1)
newline_token.lexer = token.lexer
dent_tokens = self._create_tokens_for_next_line_dent(
newline_token)
if is_full_line_comment:
# Comment takes the full line so ignore entirely.
return dent_tokens
elif is_partial_line_comment:
# Comment is only a partial line. Preserve newline token.
if dent_tokens:
dent_tokens.tokens.insert(0, newline_token)
return dent_tokens
else:
return newline_token
i -= 1
def t_WSIGNORE_comment(self, token):
r'[#][^\n]*\n+'
token.lexer.lineno += token.value.count('\n')
newline_token = _create_token('NEWLINE', '\n',
token.lineno, token.lexpos + len(token.value) - 1)
newline_token.lexer = token.lexer
self._check_for_indent(newline_token)
# Define a rule so we can track line numbers
def t_INITIAL_NEWLINE(self, newline_token):
r'\n+'
newline_token.lexer.lineno += newline_token.value.count('\n')
dent_tokens = self._create_tokens_for_next_line_dent(newline_token)
if dent_tokens:
dent_tokens.tokens.insert(0, newline_token)
return dent_tokens
else:
return newline_token
def t_WSIGNORE_NEWLINE(self, newline_token):
r'\n+'
newline_token.lexer.lineno += newline_token.value.count('\n')
self._check_for_indent(newline_token)
def _create_tokens_for_next_line_dent(self, newline_token):
"""
Starting from a newline token that isn't followed by another newline
token, returns any indent or dedent tokens that immediately follow.
If indentation doesn't change, returns None.
"""
indent_delta = self._get_next_line_indent_delta(newline_token)
if indent_delta is None or indent_delta == 0:
# Next line's indent isn't relevant OR there was no change in
# indentation.
return None
dent_type = 'INDENT' if indent_delta > 0 else 'DEDENT'
dent_token = _create_token(
dent_type, '\t', newline_token.lineno + 1,
newline_token.lexpos + len(newline_token.value))
tokens = [dent_token] * abs(indent_delta)
self.cur_indent += indent_delta
return MultiToken(tokens)
def _check_for_indent(self, newline_token):
"""
Checks that the line following a newline is indented, otherwise a
parsing error is generated.
"""
indent_delta = self._get_next_line_indent_delta(newline_token)
if indent_delta is None or indent_delta == 1:
# Next line's indent isn't relevant (e.g. it's a comment) OR
# next line is correctly indented.
return None
else:
self.errors.append(
('Line continuation must increment indent by 1.',
newline_token.lexer.lineno))
def _get_next_line_indent_delta(self, newline_token):
"""
Returns the change in indentation. The return units are in
indentations rather than spaces/tabs.
If the next line's indent isn't relevant (e.g. it's a comment),
returns None. Since the return value might be 0, the caller should
explicitly check the return type, rather than rely on truthiness.
"""
assert newline_token.type == 'NEWLINE', \
'Can only search for a dent starting from a newline.'
next_line_pos = newline_token.lexpos + len(newline_token.value)
if next_line_pos == len(newline_token.lexer.lexdata):
# Reached end of file
return None
line = newline_token.lexer.lexdata[next_line_pos:].split(os.linesep, 1)[0]
if not line:
return None
lstripped_line = line.lstrip()
lstripped_line_length = len(lstripped_line)
if lstripped_line_length == 0:
# If the next line is composed of only spaces, ignore indentation.
return None
if lstripped_line[0] == '#':
# If it's a comment line, ignore indentation.
return None
indent = len(line) - lstripped_line_length
if indent % 4 > 0:
self.errors.append(
('Indent is not divisible by 4.', newline_token.lexer.lineno))
return None
indent_delta = indent - _indent_level_to_spaces_count(self.cur_indent)
return indent_delta // 4
# A string containing ignored characters (spaces and tabs)
t_ignore = ' \t'
# Error handling rule
def t_ANY_error(self, token):
self._logger.debug('Illegal character %r at line %d',
token.value[0], token.lexer.lineno)
self.errors.append(
('Illegal character %s.' % repr(token.value[0]).lstrip('u'),
token.lexer.lineno))
token.lexer.skip(1)
def _create_token(token_type, value, lineno, lexpos):
"""
Helper for creating ply.lex.LexToken objects. Unfortunately, LexToken
does not have a constructor defined to make settings these values easy.
"""
token = lex.LexToken()
token.type = token_type
token.value = value
token.lineno = lineno
token.lexpos = lexpos
return token
def _indent_level_to_spaces_count(indent):
return indent * 4

View file

@ -0,0 +1,880 @@
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import OrderedDict
import logging
import ply.yacc as yacc
from .lexer import (
Lexer,
NullToken,
)
from .ast import (
AstAlias,
AstAnnotationDef,
AstAnnotationRef,
AstAnnotationTypeDef,
AstAttrField,
AstExample,
AstExampleField,
AstExampleRef,
AstField,
AstNamespace,
AstImport,
AstRouteDef,
AstStructDef,
AstStructPatch,
AstSubtypeField,
AstTagRef,
AstTypeRef,
AstUnionDef,
AstUnionPatch,
AstVoidField,
)
logger = logging.getLogger(str('stone.frontend.parser'))
class ParserFactory(object):
"""
After instantiating a ParserFactory, call get_parser() to get an object
with a parse() method. It so happens that the object is also a
ParserFactory. The purpose of get_parser() is to reset the internal state
of the fatory. The details for why these aren't cleanly separated have to
do with the inability to separate out the yacc.yacc BNF definition parser
from the class methods that implement the parser handling logic.
Due to how ply.yacc works, the docstring of each parser method is a BNF
rule. Comments that would normally be docstrings for each parser rule
method are kept before the method definition.
"""
# Ply parser requiment: Tokens must be re-specified in parser
tokens = Lexer.tokens
# Ply feature: Starting grammar rule
start = str('spec') # PLY wants a 'str' instance; this makes it work in Python 2 and 3
def __init__(self, debug=False):
self.debug = debug
self.yacc = yacc.yacc(module=self, debug=self.debug, write_tables=self.debug)
self.lexer = Lexer()
# [(token type, token value, line number), ...]
self.errors = []
# Path to file being parsed. This is added to each token for its
# utility in error reporting. But the path is never accessed, so this
# is optional.
self.path = None
self.anony_defs = []
self.exhausted = True
def get_parser(self):
"""
Returns a ParserFactory with the state reset so it can be used to
parse again.
:return: ParserFactory
"""
self.path = None
self.anony_defs = []
self.exhausted = False
return self
def parse(self, data, path=None):
"""
Args:
data (str): Raw specification text.
path (Optional[str]): Path to specification on filesystem. Only
used to tag tokens with the file they originated from.
"""
assert not self.exhausted, 'Must call get_parser() to reset state.'
self.path = path
parsed_data = self.yacc.parse(data, lexer=self.lexer, debug=self.debug)
# It generally makes sense for lexer errors to come first, because
# those can be the root of parser errors. Also, since we only show one
# error max right now, it's best to show the lexing one.
for err_msg, lineno in self.lexer.errors[::-1]:
self.errors.insert(0, (err_msg, lineno, self.path))
parsed_data.extend(self.anony_defs)
self.exhausted = True
return parsed_data
def test_lexing(self, data):
self.lexer.test(data)
def got_errors_parsing(self):
"""Whether the lexer or parser had errors."""
return self.errors
def get_errors(self):
"""
If got_errors_parsing() returns True, call this to get the errors.
Returns:
list[tuple[msg: str, lineno: int, path: str]]
"""
return self.errors[:]
# --------------------------------------------------------------
# Spec := Namespace Import* Definition*
def p_spec_init(self, p):
"""spec : NL
| empty"""
p[0] = []
def p_spec_init_decl(self, p):
"""spec : namespace
| import
| definition"""
p[0] = [p[1]]
def p_spec_iter(self, p):
"""spec : spec namespace
| spec import
| spec definition"""
p[0] = p[1]
p[0].append(p[2])
# This covers the case where we have garbage characters in a file that
# splits a NL token into two separate tokens.
def p_spec_ignore_newline(self, p):
'spec : spec NL'
p[0] = p[1]
def p_definition(self, p):
"""definition : alias
| annotation
| annotation_type
| struct
| struct_patch
| union
| union_patch
| route"""
p[0] = p[1]
def p_namespace(self, p):
"""namespace : KEYWORD ID NL
| KEYWORD ID NL INDENT docsection DEDENT"""
if p[1] == 'namespace':
doc = None
if len(p) > 4:
doc = p[5]
p[0] = AstNamespace(
self.path, p.lineno(1), p.lexpos(1), p[2], doc)
else:
raise ValueError('Expected namespace keyword')
def p_import(self, p):
'import : IMPORT ID NL'
p[0] = AstImport(self.path, p.lineno(1), p.lexpos(1), p[2])
def p_alias(self, p):
"""alias : KEYWORD ID EQ type_ref NL
| KEYWORD ID EQ type_ref NL INDENT annotation_ref_list docsection DEDENT"""
if p[1] == 'alias':
has_annotations = len(p) > 6 and p[7] is not None
doc = p[8] if len(p) > 6 else None
p[0] = AstAlias(
self.path, p.lineno(1), p.lexpos(1), p[2], p[4], doc)
if has_annotations:
p[0].set_annotations(p[7])
else:
raise ValueError('Expected alias keyword')
def p_nl(self, p):
'NL : NEWLINE'
p[0] = p[1]
# Sometimes we'll have multiple consecutive newlines that the lexer has
# trouble combining, so we do it in the parser.
def p_nl_combine(self, p):
'NL : NL NEWLINE'
p[0] = p[1]
# --------------------------------------------------------------
# Primitive Types
def p_primitive(self, p):
"""primitive : BOOLEAN
| FLOAT
| INTEGER
| NULL
| STRING"""
p[0] = p[1]
# --------------------------------------------------------------
# References to Types
#
# There are several places references to types are made:
# 1. Alias sources
# alias x = TypeRef
# 2. Field data types
# struct S
# f TypeRef
# 3. In arguments to type references
# struct S
# f TypeRef(key=TypeRef)
#
# A type reference can have positional and keyword arguments:
# TypeRef(value1, ..., kwarg1=kwvalue1)
# If it has no arguments, the parentheses can be omitted.
#
# If a type reference has a '?' suffix, it is a nullable type.
def p_pos_arg(self, p):
"""pos_arg : primitive
| type_ref"""
p[0] = p[1]
def p_pos_args_list_create(self, p):
"""pos_args_list : pos_arg"""
p[0] = [p[1]]
def p_pos_args_list_extend(self, p):
"""pos_args_list : pos_args_list COMMA pos_arg"""
p[0] = p[1]
p[0].append(p[3])
def p_kw_arg(self, p):
"""kw_arg : ID EQ primitive
| ID EQ type_ref"""
p[0] = {p[1]: p[3]}
def p_kw_args(self, p):
"""kw_args : kw_arg"""
p[0] = p[1]
def p_kw_args_update(self, p):
"""kw_args : kw_args COMMA kw_arg"""
p[0] = p[1]
for key in p[3]:
if key in p[1]:
msg = "Keyword argument '%s' defined more than once." % key
self.errors.append((msg, p.lineno(2), self.path))
p[0].update(p[3])
def p_args(self, p):
"""args : LPAR pos_args_list COMMA kw_args RPAR
| LPAR pos_args_list RPAR
| LPAR kw_args RPAR
| LPAR RPAR
| empty"""
if len(p) > 3:
if p[3] == ',':
p[0] = (p[2], p[4])
elif isinstance(p[2], dict):
p[0] = ([], p[2])
else:
p[0] = (p[2], {})
else:
p[0] = ([], {})
def p_field_nullable(self, p):
"""nullable : Q
| empty"""
p[0] = p[1] == '?'
def p_type_ref(self, p):
'type_ref : ID args nullable'
p[0] = AstTypeRef(
path=self.path,
lineno=p.lineno(1),
lexpos=p.lexpos(1),
name=p[1],
args=p[2],
nullable=p[3],
ns=None,
)
# A reference to a type in another namespace.
def p_foreign_type_ref(self, p):
'type_ref : ID DOT ID args nullable'
p[0] = AstTypeRef(
path=self.path,
lineno=p.lineno(1),
lexpos=p.lexpos(1),
name=p[3],
args=p[4],
nullable=p[5],
ns=p[1],
)
# --------------------------------------------------------------
# Annotation types
#
# An example annotation type:
#
# annotation_type Sensitive
# "This is a docstring for the annotation type"
#
# sensitivity Int32
#
# reason String?
# "This is a docstring for the field"
#
def p_annotation_type(self, p):
"""annotation_type : ANNOTATION_TYPE ID NL \
INDENT docsection field_list DEDENT"""
p[0] = AstAnnotationTypeDef(
path=self.path,
lineno=p.lineno(1),
lexpos=p.lexpos(1),
name=p[2],
doc=p[5],
params=p[6])
# --------------------------------------------------------------
# Structs
#
# An example struct looks as follows:
#
# struct S extends P
# "This is a docstring for the struct"
#
# typed_field String
# "This is a docstring for the field"
#
# An example struct that enumerates subtypes looks as follows:
#
# struct P
# union
# t1 S1
# t2 S2
# field String
#
# struct S1 extends P
# ...
#
# struct S2 extends P
# ...
#
def p_enumerated_subtypes(self, p):
"""enumerated_subtypes : uniont NL INDENT subtypes_list DEDENT
| empty"""
if len(p) > 2:
p[0] = (p[4], p[1][0] == 'union')
def p_struct(self, p):
"""struct : STRUCT ID inheritance NL \
INDENT docsection enumerated_subtypes field_list examples DEDENT"""
self.make_struct(p)
def p_anony_struct(self, p):
"""anony_def : STRUCT empty inheritance NL \
INDENT docsection enumerated_subtypes field_list examples DEDENT"""
self.make_struct(p)
def make_struct(self, p):
p[0] = AstStructDef(
path=self.path,
lineno=p.lineno(1),
lexpos=p.lexpos(1),
name=p[2],
extends=p[3],
doc=p[6],
subtypes=p[7],
fields=p[8],
examples=p[9])
def p_struct_patch(self, p):
"""struct_patch : PATCH STRUCT ID NL INDENT field_list examples DEDENT"""
p[0] = AstStructPatch(
path=self.path,
lineno=p.lineno(1),
lexpos=p.lexpos(1),
name=p[3],
fields=p[6],
examples=p[7])
def p_inheritance(self, p):
"""inheritance : EXTENDS type_ref
| empty"""
if p[1]:
if p[2].nullable:
msg = 'Reference cannot be nullable.'
self.errors.append((msg, p.lineno(1), self.path))
else:
p[0] = p[2]
def p_enumerated_subtypes_list_create(self, p):
"""subtypes_list : subtype_field
| empty"""
if p[1] is not None:
p[0] = [p[1]]
def p_enumerated_subtypes_list_extend(self, p):
'subtypes_list : subtypes_list subtype_field'
p[0] = p[1]
p[0].append(p[2])
def p_enumerated_subtype_field(self, p):
'subtype_field : ID type_ref NL'
p[0] = AstSubtypeField(
self.path, p.lineno(1), p.lexpos(1), p[1], p[2])
# --------------------------------------------------------------
# Fields
#
# Each struct has zero or more fields. A field has a name, type,
# and docstring.
#
# TODO(kelkabany): Split fields into struct fields and union fields
# since they differ in capabilities rather significantly now.
def p_field_list_create(self, p):
"""field_list : field
| empty"""
if p[1] is None:
p[0] = []
else:
p[0] = [p[1]]
def p_field_list_extend(self, p):
'field_list : field_list field'
p[0] = p[1]
p[0].append(p[2])
def p_default_option(self, p):
"""default_option : EQ primitive
| EQ tag_ref
| empty"""
if p[1]:
if isinstance(p[2], AstTagRef):
p[0] = p[2]
else:
p[0] = p[2]
def p_field(self, p):
"""field : ID type_ref default_option NL \
INDENT annotation_ref_list docsection anony_def_option DEDENT
| ID type_ref default_option NL"""
has_annotations = len(p) > 5 and p[6] is not None
has_docstring = len(p) > 5 and p[7] is not None
has_anony_def = len(p) > 5 and p[8] is not None
p[0] = AstField(
self.path, p.lineno(1), p.lexpos(1), p[1], p[2])
if p[3] is not None:
if p[3] is NullToken:
p[0].set_default(None)
else:
p[0].set_default(p[3])
if has_annotations:
p[0].set_annotations(p[6])
if has_docstring:
p[0].set_doc(p[7])
if has_anony_def:
p[8].name = p[2].name
self.anony_defs.append(p[8])
def p_anony_def_option(self, p):
"""anony_def_option : anony_def
| empty"""
p[0] = p[1]
def p_tag_ref(self, p):
'tag_ref : ID'
p[0] = AstTagRef(self.path, p.lineno(1), p.lexpos(1), p[1])
def p_annotation(self, p):
"""annotation : ANNOTATION ID EQ ID args NL
| ANNOTATION ID EQ ID DOT ID args NL"""
if len(p) < 8:
args, kwargs = p[5]
p[0] = AstAnnotationDef(
self.path, p.lineno(1), p.lexpos(1), p[2], p[4], None, args, kwargs)
else:
args, kwargs = p[7]
p[0] = AstAnnotationDef(
self.path, p.lineno(1), p.lexpos(1), p[2], p[6], p[4], args, kwargs)
def p_annotation_ref_list_create(self, p):
"""annotation_ref_list : annotation_ref
| empty"""
if p[1] is not None:
p[0] = [p[1]]
else:
p[0] = None
def p_annotation_ref_list_extend(self, p):
"""annotation_ref_list : annotation_ref_list annotation_ref"""
p[0] = p[1]
p[0].append(p[2])
def p_annotation_ref(self, p):
"""annotation_ref : AT ID NL
| AT ID DOT ID NL"""
if len(p) < 5:
p[0] = AstAnnotationRef(self.path, p.lineno(1), p.lexpos(1), p[2], None)
else:
p[0] = AstAnnotationRef(self.path, p.lineno(1), p.lexpos(1), p[4], p[2])
# --------------------------------------------------------------
# Unions
#
# An example union looks as follows:
#
# union U
# "This is a docstring for the union"
#
# void_field*
# "Docstring for field with type Void"
# typed_field String
#
# void_field demonstrates the notation for a catch all variant.
def p_union(self, p):
"""union : uniont ID inheritance NL \
INDENT docsection field_list examples DEDENT"""
self.make_union(p)
def p_anony_union(self, p):
"""anony_def : uniont empty inheritance NL \
INDENT docsection field_list examples DEDENT"""
self.make_union(p)
def make_union(self, p):
p[0] = AstUnionDef(
path=self.path,
lineno=p[1][1],
lexpos=p[1][2],
name=p[2],
extends=p[3],
doc=p[6],
fields=p[7],
examples=p[8],
closed=p[1][0] == 'union_closed')
def p_union_patch(self, p):
"""union_patch : PATCH uniont ID NL INDENT field_list examples DEDENT"""
p[0] = AstUnionPatch(
path=self.path,
lineno=p[2][1],
lexpos=p[2][2],
name=p[3],
fields=p[6],
examples=p[7],
closed=p[2][0] == 'union_closed')
def p_uniont(self, p):
"""uniont : UNION
| UNION_CLOSED"""
p[0] = (p[1], p.lineno(1), p.lexpos(1))
def p_field_void(self, p):
"""field : ID NL
| ID NL INDENT annotation_ref_list docsection DEDENT"""
p[0] = AstVoidField(self.path, p.lineno(1), p.lexpos(1), p[1])
if len(p) > 3:
if p[4] is not None:
p[0].set_annotations(p[4])
if p[5] is not None:
p[0].set_doc(p[5])
# --------------------------------------------------------------
# Routes
#
# An example route looks as follows:
#
# route sample-route/sub-path:2 (arg, result, error)
# "This is a docstring for the route"
#
# attrs
# key="value"
#
# The error type is optional.
def p_route(self, p):
"""route : ROUTE route_name route_version route_io route_deprecation NL \
INDENT docsection attrssection DEDENT
| ROUTE route_name route_version route_io route_deprecation NL"""
p[0] = AstRouteDef(self.path, p.lineno(1), p.lexpos(1), p[2], p[3], p[5], *p[4])
if len(p) > 7:
p[0].set_doc(p[8])
if p[9]:
keys = set()
for attr in p[9]:
if attr.name in keys:
msg = "Attribute '%s' defined more than once." % attr.name
self.errors.append((msg, attr.lineno, attr.path))
keys.add(attr.name)
p[0].set_attrs(p[9])
def p_route_name(self, p):
'route_name : ID route_path'
if p[2]:
p[0] = p[1] + p[2]
else:
p[0] = p[1]
def p_route_path_suffix(self, p):
"""route_path : PATH
| empty"""
p[0] = p[1]
def p_route_version(self, p):
"""route_version : COLON INTEGER
| empty"""
if len(p) > 2:
if p[2] <= 0:
msg = "Version number should be a positive integer."
self.errors.append((msg, p.lineno(2), self.path))
p[0] = p[2]
else:
p[0] = 1
def p_route_io(self, p):
"""route_io : LPAR type_ref COMMA type_ref RPAR
| LPAR type_ref COMMA type_ref COMMA type_ref RPAR"""
if len(p) > 6:
p[0] = (p[2], p[4], p[6])
else:
p[0] = (p[2], p[4], None)
def p_route_deprecation(self, p):
"""route_deprecation : DEPRECATED
| DEPRECATED BY route_name route_version
| empty"""
if len(p) == 5:
p[0] = (True, p[3], p[4])
elif p[1]:
p[0] = (True, None, None)
def p_attrs_section(self, p):
"""attrssection : ATTRS NL INDENT attr_fields DEDENT
| empty"""
if p[1]:
p[0] = p[4]
def p_attr_fields_create(self, p):
'attr_fields : attr_field'
p[0] = [p[1]]
def p_attr_fields_add(self, p):
'attr_fields : attr_fields attr_field'
p[0] = p[1]
p[0].append(p[2])
def p_attr_field(self, p):
"""attr_field : ID EQ primitive NL
| ID EQ tag_ref NL"""
if p[3] is NullToken:
p[0] = AstAttrField(
self.path, p.lineno(1), p.lexpos(1), p[1], None)
else:
p[0] = AstAttrField(
self.path, p.lineno(1), p.lexpos(1), p[1], p[3])
# --------------------------------------------------------------
# Doc sections
#
# Doc sections appear after struct, union, and route signatures;
# also after field declarations.
#
# They're represented by text (multi-line supported) enclosed by
# quotations.
#
# struct S
# "This is a docstring
# for struct S"
#
# number Int64
# "This is a docstring for this field"
def p_docsection(self, p):
"""docsection : docstring NL
| empty"""
if p[1] is not None:
p[0] = p[1]
def p_docstring_string(self, p):
'docstring : STRING'
# Remove trailing whitespace on every line.
p[0] = '\n'.join([line.rstrip() for line in p[1].split('\n')])
# --------------------------------------------------------------
# Examples
#
# Examples appear at the bottom of struct definitions to give
# illustrative examples of what struct values may look like.
#
# struct S
# number Int64
#
# example default "This is a label"
# number=42
def p_examples_create(self, p):
"""examples : example
| empty"""
p[0] = OrderedDict()
if p[1] is not None:
p[0][p[1].label] = p[1]
def p_examples_add(self, p):
'examples : examples example'
p[0] = p[1]
if p[2].label in p[0]:
existing_ex = p[0][p[2].label]
self.errors.append(
("Example with label '%s' already defined on line %d." %
(existing_ex.label, existing_ex.lineno),
p[2].lineno, p[2].path))
p[0][p[2].label] = p[2]
# It's possible for no example fields to be specified.
def p_example(self, p):
"""example : KEYWORD ID NL INDENT docsection example_fields DEDENT
| KEYWORD ID NL"""
if len(p) > 4:
seen_fields = set()
for example_field in p[6]:
if example_field.name in seen_fields:
self.errors.append(
("Example with label '%s' defines field '%s' more "
"than once." % (p[2], example_field.name),
p.lineno(1), self.path))
seen_fields.add(example_field.name)
p[0] = AstExample(
self.path, p.lineno(1), p.lexpos(1), p[2], p[5],
OrderedDict((f.name, f) for f in p[6]))
else:
p[0] = AstExample(
self.path, p.lineno(1), p.lexpos(1), p[2], None, OrderedDict())
def p_example_fields_create(self, p):
'example_fields : example_field'
p[0] = [p[1]]
def p_example_fields_add(self, p):
'example_fields : example_fields example_field'
p[0] = p[1]
p[0].append(p[2])
def p_example_field(self, p):
"""example_field : ID EQ primitive NL
| ID EQ ex_list NL
| ID EQ ex_map NL"""
if p[3] is NullToken:
p[0] = AstExampleField(
self.path, p.lineno(1), p.lexpos(1), p[1], None)
else:
p[0] = AstExampleField(
self.path, p.lineno(1), p.lexpos(1), p[1], p[3])
def p_example_multiline(self, p):
"""example_field : ID EQ NL INDENT ex_map NL DEDENT"""
p[0] = AstExampleField(
self.path, p.lineno(1), p.lexpos(1), p[1], p[5])
def p_example_field_ref(self, p):
'example_field : ID EQ ID NL'
p[0] = AstExampleField(self.path, p.lineno(1), p.lexpos(1),
p[1], AstExampleRef(self.path, p.lineno(3), p.lexpos(3), p[3]))
# --------------------------------------------------------------
# Example of list
def p_ex_list(self, p):
"""ex_list : LBRACKET ex_list_items RBRACKET
| LBRACKET empty RBRACKET"""
if p[2] is None:
p[0] = []
else:
p[0] = p[2]
def p_ex_list_item_primitive(self, p):
'ex_list_item : primitive'
if p[1] is NullToken:
p[0] = None
else:
p[0] = p[1]
def p_ex_list_item_id(self, p):
'ex_list_item : ID'
p[0] = AstExampleRef(self.path, p.lineno(1), p.lexpos(1), p[1])
def p_ex_list_item_list(self, p):
'ex_list_item : ex_list'
p[0] = p[1]
def p_ex_list_items_create(self, p):
"""ex_list_items : ex_list_item"""
p[0] = [p[1]]
def p_ex_list_items_extend(self, p):
"""ex_list_items : ex_list_items COMMA ex_list_item"""
p[0] = p[1]
p[0].append(p[3])
# --------------------------------------------------------------
# Maps
#
def p_ex_map(self, p):
"""ex_map : LBRACE ex_map_pairs RBRACE
| LBRACE empty RBRACE"""
p[0] = p[2] or {}
def p_ex_map_multiline(self, p):
"""ex_map : LBRACE NL INDENT ex_map_pairs NL DEDENT RBRACE"""
p[0] = p[4] or {}
def p_ex_map_elem_primitive(self, p):
"""ex_map_elem : primitive"""
p[0] = None if p[1] == NullToken else p[1]
def p_ex_map_elem_composit(self, p):
"""ex_map_elem : ex_map
| ex_list"""
p[0] = p[1]
def p_ex_map_elem_id(self, p):
"""ex_map_elem : ID"""
p[0] = AstExampleRef(self.path, p.lineno(1), p.lexpos(1), p[1])
def p_ex_map_pair(self, p):
"""ex_map_pair : ex_map_elem COLON ex_map_elem"""
try:
p[0] = {p[1]: p[3]}
except TypeError:
msg = u"%s is an invalid hash key because it cannot be hashed." % repr(p[1])
self.errors.append((msg, p.lineno(2), self.path))
p[0] = {}
def p_ex_map_pairs_create(self, p):
"""ex_map_pairs : ex_map_pair """
p[0] = p[1]
def p_ex_map_pairs_extend(self, p):
"""ex_map_pairs : ex_map_pairs COMMA ex_map_pair"""
p[0] = p[1]
p[0].update(p[3])
def p_ex_map_pairs_multiline(self, p):
"""ex_map_pairs : ex_map_pairs COMMA NL ex_map_pair"""
p[0] = p[1]
p[0].update(p[4])
# --------------------------------------------------------------
# In ply, this is how you define an empty rule. This is used when we want
# the parser to treat a rule as optional.
def p_empty(self, p):
'empty :'
# Called by the parser whenever a token doesn't match any rule.
def p_error(self, token):
assert token is not None, "Unknown error, please report this."
logger.debug('Unexpected %s(%r) at line %d',
token.type,
token.value,
token.lineno)
self.errors.append(
("Unexpected %s with value %s." %
(token.type, repr(token.value).lstrip('u')),
token.lineno, self.path))