hat.peg

PEG Parser

Implementation of PEG parser as described in paper "Parsing Expression Grammars: A Recognition-Based Syntactic Foundation" (Bryan Ford, 2004). PEG's grammar itself can be defined by PEG grammar::

# Hierarchical syntax
Grammar    <- Spacing Definition+ EndOfFile
Definition <- Identifier LEFTARROW Expression
Expression <- Sequence (SLASH Sequence)*
Sequence   <- Prefix*
Prefix     <- (AND / NOT)? Suffix
Suffix     <- Primary (QUESTION / STAR / PLUS)?
Primary    <- Identifier !LEFTARROW
            / OPEN Expression CLOSE
            / Literal
            / Class
            / DOT

# Lexical syntax
Identifier <- IdentStart IdentCont* Spacing
IdentStart <- [a-zA-Z_]
IdentCont  <- IdentStart / [0-9]
Literal    <- ['] (!['] Char)* ['] Spacing
            / ["] (!["] Char)* ["] Spacing
Class      <- '[' (!']' Range)* ']' Spacing
Range      <- Char '-' Char / Char
Char       <- '\\' [nrt'"\[\]\\]
            / '\\' 'x' Hex Hex
            / '\\' 'u' Hex Hex Hex Hex
            / !'\\' .
Hex        <- [0-9a-fA-F]

LEFTARROW  <- '<-' Spacing
SLASH      <- '/' Spacing
AND        <- '&' Spacing
NOT        <- '!' Spacing
QUESTION   <- '?' Spacing
STAR       <- '*' Spacing
PLUS       <- '+' Spacing
OPEN       <- '(' Spacing
CLOSE      <- ')' Spacing
DOT        <- '.' Spacing

Spacing    <- (Space / Comment)*
Comment    <- '#' (!EndOfLine .)* EndOfLine
Space      <- ' ' / '\t' / EndOfLine
EndOfLine  <- '\r\n' / '\n' / '\r'
EndOfFile  <- !.

Example usage of PEG parser::

import functools
import hat.peg

grammar = hat.peg.Grammar(r'''
    Expr    <- Sum
    Sum     <- Product (('+' / '-') Product)*
    Product <- Value (('*' / '/') Value)*
    Value   <- Spacing ([0-9]+ / '(' Expr ')') Spacing
    Spacing <- ' '*
''', 'Expr')

ast = grammar.parse('1 + 2 * 3 + (4 - 5) * 6 + 7')

result = hat.peg.walk_ast(ast, {
    'Expr': lambda n, c: c[0],
    'Sum': lambda n, c: functools.reduce(
        (lambda acc, x: acc + x[1] if x[0] == '+' else acc - x[1]),
        zip(c[1::2], c[2::2]),
        c[0]),
    'Product': lambda n, c: functools.reduce(
        (lambda acc, x: acc * x[1] if x[0] == '*' else acc / x[1]),
        zip(c[1::2], c[2::2]),
        c[0]),
    'Value': lambda n, c: (c[2] if c[1] == '(' else
                           int(''.join(c[1:-1])))})

assert result == 8
  1r"""PEG Parser
  2
  3Implementation of PEG parser as described in paper "Parsing Expression
  4Grammars: A Recognition-Based Syntactic Foundation" (Bryan Ford, 2004).
  5PEG's grammar itself can be defined by PEG grammar::
  6
  7    # Hierarchical syntax
  8    Grammar    <- Spacing Definition+ EndOfFile
  9    Definition <- Identifier LEFTARROW Expression
 10    Expression <- Sequence (SLASH Sequence)*
 11    Sequence   <- Prefix*
 12    Prefix     <- (AND / NOT)? Suffix
 13    Suffix     <- Primary (QUESTION / STAR / PLUS)?
 14    Primary    <- Identifier !LEFTARROW
 15                / OPEN Expression CLOSE
 16                / Literal
 17                / Class
 18                / DOT
 19
 20    # Lexical syntax
 21    Identifier <- IdentStart IdentCont* Spacing
 22    IdentStart <- [a-zA-Z_]
 23    IdentCont  <- IdentStart / [0-9]
 24    Literal    <- ['] (!['] Char)* ['] Spacing
 25                / ["] (!["] Char)* ["] Spacing
 26    Class      <- '[' (!']' Range)* ']' Spacing
 27    Range      <- Char '-' Char / Char
 28    Char       <- '\\' [nrt'"\[\]\\]
 29                / '\\' 'x' Hex Hex
 30                / '\\' 'u' Hex Hex Hex Hex
 31                / !'\\' .
 32    Hex        <- [0-9a-fA-F]
 33
 34    LEFTARROW  <- '<-' Spacing
 35    SLASH      <- '/' Spacing
 36    AND        <- '&' Spacing
 37    NOT        <- '!' Spacing
 38    QUESTION   <- '?' Spacing
 39    STAR       <- '*' Spacing
 40    PLUS       <- '+' Spacing
 41    OPEN       <- '(' Spacing
 42    CLOSE      <- ')' Spacing
 43    DOT        <- '.' Spacing
 44
 45    Spacing    <- (Space / Comment)*
 46    Comment    <- '#' (!EndOfLine .)* EndOfLine
 47    Space      <- ' ' / '\t' / EndOfLine
 48    EndOfLine  <- '\r\n' / '\n' / '\r'
 49    EndOfFile  <- !.
 50
 51Example usage of PEG parser::
 52
 53    import functools
 54    import hat.peg
 55
 56    grammar = hat.peg.Grammar(r'''
 57        Expr    <- Sum
 58        Sum     <- Product (('+' / '-') Product)*
 59        Product <- Value (('*' / '/') Value)*
 60        Value   <- Spacing ([0-9]+ / '(' Expr ')') Spacing
 61        Spacing <- ' '*
 62    ''', 'Expr')
 63
 64    ast = grammar.parse('1 + 2 * 3 + (4 - 5) * 6 + 7')
 65
 66    result = hat.peg.walk_ast(ast, {
 67        'Expr': lambda n, c: c[0],
 68        'Sum': lambda n, c: functools.reduce(
 69            (lambda acc, x: acc + x[1] if x[0] == '+' else acc - x[1]),
 70            zip(c[1::2], c[2::2]),
 71            c[0]),
 72        'Product': lambda n, c: functools.reduce(
 73            (lambda acc, x: acc * x[1] if x[0] == '*' else acc / x[1]),
 74            zip(c[1::2], c[2::2]),
 75            c[0]),
 76        'Value': lambda n, c: (c[2] if c[1] == '(' else
 77                               int(''.join(c[1:-1])))})
 78
 79    assert result == 8
 80
 81"""
 82
 83import typing
 84
 85from hat import util
 86
 87
 88class Node(typing.NamedTuple):
 89    """Abstract syntax tree node.
 90
 91    Node names are identifiers of parser's definitions and values
 92    are other nodes or string values representing matched `Literal`, `Class`
 93    or `Dot` leafs.
 94
 95    """
 96    name: str
 97    value: typing.List[typing.Union['Node', str]]
 98
 99
100Action = typing.Callable[[Node, typing.List], typing.Any]
101"""Action"""
102
103
104def walk_ast(node: Node,
105             actions: typing.Dict[str, Action],
106             default_action: typing.Optional[Action] = None
107             ) -> typing.Any:
108    """Simple depth-first abstract syntax tree parser.
109
110    Actions are key value pairs where keys represent node names and values
111    are callables that should be called for appropriate node. Each callable
112    receives matched node and list of results from recursively applying
113    this function on child nodes. For nodes which name doesn't match any
114    action, default action is used. If default action is not defined and node
115    name doesn't match action, result is None and recursion is stopped.
116
117    """
118    action = actions.get(node.name, default_action)
119    if not action:
120        return
121    children = [walk_ast(i, actions, default_action)
122                if isinstance(i, Node) else i
123                for i in node.value]
124    return action(node, children)
125
126
127class Sequence(typing.NamedTuple):
128    expressions: typing.List['Expression']
129
130
131class Choice(typing.NamedTuple):
132    expressions: typing.List['Expression']
133
134
135class Not(typing.NamedTuple):
136    expression: 'Expression'
137
138
139class And(typing.NamedTuple):
140    expression: 'Expression'
141
142
143class OneOrMore(typing.NamedTuple):
144    expression: 'Expression'
145
146
147class ZeroOrMore(typing.NamedTuple):
148    expression: 'Expression'
149
150
151class Optional(typing.NamedTuple):
152    expression: 'Expression'
153
154
155class Identifier(typing.NamedTuple):
156    value: str
157
158
159class Literal(typing.NamedTuple):
160    value: str
161
162
163class Class(typing.NamedTuple):
164    values: typing.List[typing.Union[str, typing.Tuple[str, str]]]
165
166
167class Dot(typing.NamedTuple):
168    pass
169
170
171Expression = typing.Union[Sequence, Choice, Not, And, OneOrMore, ZeroOrMore,
172                          Optional, Identifier, Literal, Class, Dot]
173"""Expression"""
174
175
176class MatchResult(typing.NamedTuple):
177    """Match result
178
179    Args:
180        node: matched node or ``None`` if match failed
181        rest: remaining input data
182
183    """
184    node: typing.Optional[Node]
185    rest: str
186
187
188class MatchCallFrame(typing.NamedTuple):
189    """Match call frame
190
191    Args:
192        name: definition name
193        data: input data
194
195    """
196    name: str
197    data: str
198
199
200MatchCallStack = typing.Iterable[MatchCallFrame]
201"""Match call stack"""
202
203
204DebugCb = typing.Callable[[MatchResult, MatchCallStack], None]
205"""Debug callback"""
206
207
208class Grammar:
209    """PEG Grammar.
210
211    Args:
212        definitions: grammar definitions
213        starting: starting definition name
214
215    """
216
217    def __init__(self,
218                 definitions: typing.Union[str,
219                                           typing.Dict[str, Expression]],
220                 starting: str):
221        if isinstance(definitions, str):
222            ast = _peg_grammar.parse(definitions)
223            definitions = walk_ast(ast, _peg_actions)
224            definitions = {k: _reduce_expression(v)
225                           for k, v in definitions.items()}
226        self._definitions = definitions
227        self._starting = starting
228
229    @property
230    def definitions(self) -> typing.Dict[str, Expression]:
231        """Definitions"""
232        return self._definitions
233
234    @property
235    def starting(self) -> str:
236        """Starting definition name"""
237        return self._starting
238
239    def parse(self,
240              data: str,
241              debug_cb: typing.Optional[DebugCb] = None
242              ) -> Node:
243        """Parse input data.
244
245        `debug_cb` is optional function which can be used for monitoring and
246        debugging parse steps. It is called each time named definition
247        is successfully or unsuccessfully matched. This function receives
248        match result and match call stack.
249
250        """
251        state = _State(grammar=self,
252                       data=data,
253                       call_stack=_MatchCallStack(None, None),
254                       debug_cb=debug_cb)
255        result = _match(state, self._starting)
256        if result.node is None:
257            raise Exception("could not match starting definition")
258        x = _reduce_node(result.node)
259        return x
260
261
262def console_debug_cb(result: MatchResult, call_stack: MatchCallStack):
263    """Simple console debugger."""
264    success = '+++' if result.node else '---'
265    stack = ', '.join(frame.name for frame in call_stack)
266    consumed = util.first(call_stack).data[:-len(result.rest)]
267    print(success, stack)
268    print('<<<', consumed)
269    print('>>>', result.rest, flush=True)
270
271
272class _MatchCallStack(typing.NamedTuple):
273    frame: typing.Optional[MatchCallFrame]
274    previous: typing.Optional['_MatchCallStack']
275
276    def __iter__(self):
277        current = self
278        while current and current.frame:
279            yield current.frame
280            current = current.previous
281
282
283class _State(typing.NamedTuple):
284    grammar: Grammar
285    data: str
286    call_stack: _MatchCallStack
287    debug_cb: DebugCb
288
289
290def _match(state, name):
291    call_frame = MatchCallFrame(name, state.data)
292    if call_frame in state.call_stack:
293        raise Exception('Infinite matching recursion detected')
294    state = state._replace(
295        call_stack=_MatchCallStack(call_frame, state.call_stack))
296    result = _match_expression(state, state.grammar.definitions[name])
297    if result.node:
298        result = result._replace(
299            node=result.node._replace(name=name))
300    if state.debug_cb:
301        debug_result = MatchResult(
302            node=(_reduce_node(result.node) if result.node else None),
303            rest=result.rest)
304        state.debug_cb(debug_result, state.call_stack)
305    return result
306
307
308def _match_expression(state, expression):
309    t = type(expression)
310    if t == Choice:
311        return _match_choice(state, expression)
312    if t == Sequence:
313        return _match_sequence(state, expression)
314    if t == Not:
315        return _match_not(state, expression)
316    if t == And:
317        return _match_and(state, expression)
318    if t == OneOrMore:
319        return _match_oneormore(state, expression)
320    if t == ZeroOrMore:
321        return _match_zeroormore(state, expression)
322    if t == Optional:
323        return _match_optional(state, expression)
324    if t == Identifier:
325        return _match_identifier(state, expression)
326    if t == Literal:
327        return _match_literal(state, expression)
328    if t == Class:
329        return _match_class(state, expression)
330    if t == Dot:
331        return _match_dot(state, expression)
332    raise ValueError('unsupported expression type')
333
334
335def _match_choice(state, expression):
336    for i in expression.expressions:
337        result = _match_expression(state, i)
338        if result.node:
339            return result
340    return MatchResult(None, state.data)
341
342
343def _match_sequence(state, expression):
344    nodes = []
345    for i in expression.expressions:
346        result = _match_expression(state, i)
347        if not result.node:
348            return MatchResult(None, state.data)
349        nodes.append(result.node)
350        state = state._replace(data=result.rest)
351    return MatchResult(Node(None, nodes), state.data)
352
353
354def _match_not(state, expression):
355    result = _match_expression(state, expression.expression)
356    return (MatchResult(None, state.data) if result.node
357            else MatchResult(Node(None, []), state.data))
358
359
360def _match_and(state, expression):
361    node = _match_expression(state, expression.expression).node
362    return MatchResult(Node(None, []) if node else None, state.data)
363
364
365def _match_oneormore(state, expression):
366    return _match_expression(state, Sequence([
367        expression.expression,
368        ZeroOrMore(expression.expression)]))
369
370
371def _match_zeroormore(state, expression):
372    nodes = []
373    while True:
374        result = _match_expression(state, expression.expression)
375        if not result.node:
376            return MatchResult(Node(None, nodes), state.data)
377        nodes.append(result.node)
378        state = state._replace(data=result.rest)
379
380
381def _match_optional(state, expression):
382    result = _match_expression(state, expression.expression)
383    if result.node:
384        return result
385    return MatchResult(Node(None, []), state.data)
386
387
388def _match_identifier(state, expression):
389    result = _match(state, expression.value)
390    if result.node:
391        result = result._replace(node=Node(None, [result.node]))
392    return result
393
394
395def _match_literal(state, expression):
396    data = state.data[:len(expression.value)]
397    if data != expression.value:
398        return MatchResult(None, state.data)
399    rest = state.data[len(expression.value):]
400    return MatchResult(Node(None, [data]), rest)
401
402
403def _match_class(state, expression):
404    if state.data:
405        for i in expression.values:
406            if isinstance(i, str):
407                matched = state.data[0] == i
408            else:
409                matched = i[0] <= state.data[0] <= i[1]
410            if matched:
411                return MatchResult(Node(None, [state.data[:1]]),
412                                   state.data[1:])
413    return MatchResult(None, state.data)
414
415
416def _match_dot(state, expression):
417    if not state.data:
418        return MatchResult(None, state.data)
419    return MatchResult(Node(None, [state.data[:1]]), state.data[1:])
420
421
422def _reduce_expression(expr):
423    if hasattr(expr, 'expressions'):
424        if len(expr.expressions) == 1:
425            return _reduce_expression(expr.expressions[0])
426        return expr._replace(expressions=[_reduce_expression(i)
427                                          for i in expr.expressions])
428    if hasattr(expr, 'expression'):
429        return expr._replace(expression=_reduce_expression(expr.expression))
430    return expr
431
432
433def _reduce_node(node):
434    return node._replace(value=list(_reduce_node_list(node.value)))
435
436
437def _reduce_node_list(nodes):
438    for node in nodes:
439        if not isinstance(node, Node):
440            yield node
441        elif node.name is None:
442            yield from _reduce_node_list(node.value)
443        else:
444            yield _reduce_node(node)
445
446
447_peg_grammar = Grammar({
448    'Grammar': Sequence([
449        Identifier('Spacing'),
450        OneOrMore(Identifier('Definition')),
451        Identifier('EndOfFile')]),
452    'Definition': Sequence([
453        Identifier('Identifier'),
454        Identifier('LEFTARROW'),
455        Identifier('Expression')]),
456    'Expression': Sequence([
457        Identifier('Sequence'),
458        ZeroOrMore(Sequence([
459            Identifier('SLASH'),
460            Identifier('Sequence')]))]),
461    'Sequence': ZeroOrMore(Identifier('Prefix')),
462    'Prefix': Sequence([
463        Optional(Choice([
464            Identifier('AND'),
465            Identifier('NOT')])),
466        Identifier('Suffix')]),
467    'Suffix': Sequence([
468        Identifier('Primary'),
469        Optional(Choice([
470            Identifier('QUESTION'),
471            Identifier('STAR'),
472            Identifier('PLUS')]))]),
473    'Primary': Choice([
474        Sequence([
475            Identifier('Identifier'),
476            Not(Identifier('LEFTARROW'))]),
477        Sequence([
478            Identifier('OPEN'),
479            Identifier('Expression'),
480            Identifier('CLOSE')]),
481        Identifier('Literal'),
482        Identifier('Class'),
483        Identifier('DOT')]),
484    'Identifier': Sequence([
485        Identifier('IdentStart'),
486        ZeroOrMore(Identifier('IdentCont')),
487        Identifier('Spacing')]),
488    'IdentStart': Class([('a', 'z'),
489                         ('A', 'Z'),
490                         '_']),
491    'IdentCont': Choice([
492        Identifier('IdentStart'),
493        Class([('0', '9')])]),
494    'Literal': Choice([
495        Sequence([
496            Class(["'"]),
497            ZeroOrMore(Sequence([
498                Not(Class(["'"])),
499                Identifier('Char')])),
500            Class(["'"]),
501            Identifier('Spacing')]),
502        Sequence([
503            Class(['"']),
504            ZeroOrMore(Sequence([
505                Not(Class(['"'])),
506                Identifier('Char')])),
507            Class(['"']),
508            Identifier('Spacing')])]),
509    'Class': Sequence([
510        Literal('['),
511        ZeroOrMore(Sequence([
512            Not(Literal(']')),
513            Identifier('Range')])),
514        Literal(']'),
515        Identifier('Spacing')]),
516    'Range': Choice([
517        Sequence([
518            Identifier('Char'),
519            Literal('-'),
520            Identifier('Char')]),
521        Identifier('Char')]),
522    'Char': Choice([
523        Sequence([
524            Literal('\\'),
525            Class(['n', 'r', 't', "'", '"', '[', ']', '\\'])]),
526        Sequence([
527            Literal('\\'),
528            Literal('x'),
529            Identifier('Hex'),
530            Identifier('Hex')]),
531        Sequence([
532            Literal('\\'),
533            Literal('u'),
534            Identifier('Hex'),
535            Identifier('Hex'),
536            Identifier('Hex'),
537            Identifier('Hex')]),
538        Sequence([
539            Not(Literal('\\')),
540            Dot()])]),
541    'Hex': Class([('0', '9'),
542                  ('a', 'f'),
543                  ('A', 'F')]),
544    'LEFTARROW': Sequence([
545        Literal('<-'),
546        Identifier('Spacing')]),
547    'SLASH': Sequence([
548        Literal('/'),
549        Identifier('Spacing')]),
550    'AND': Sequence([
551        Literal('&'),
552        Identifier('Spacing')]),
553    'NOT': Sequence([
554        Literal('!'),
555        Identifier('Spacing')]),
556    'QUESTION': Sequence([
557        Literal('?'),
558        Identifier('Spacing')]),
559    'STAR': Sequence([
560        Literal('*'),
561        Identifier('Spacing')]),
562    'PLUS': Sequence([
563        Literal('+'),
564        Identifier('Spacing')]),
565    'OPEN': Sequence([
566        Literal('('),
567        Identifier('Spacing')]),
568    'CLOSE': Sequence([
569        Literal(')'),
570        Identifier('Spacing')]),
571    'DOT': Sequence([
572        Literal('.'),
573        Identifier('Spacing')]),
574    'Spacing': ZeroOrMore(Choice([
575        Identifier('Space'),
576        Identifier('Comment')])),
577    'Comment': Sequence([
578        Literal('#'),
579        ZeroOrMore(Sequence([
580            Not(Identifier('EndOfLine')),
581            Dot()])),
582        Identifier('EndOfLine')]),
583    'Space': Choice([
584        Literal(' '),
585        Literal('\t'),
586        Identifier('EndOfLine')]),
587    'EndOfLine': Choice([
588        Literal('\r\n'),
589        Literal('\n'),
590        Literal('\r')]),
591    'EndOfFile': Not(Dot())
592}, 'Grammar')
593
594
595_peg_actions = {
596    'Grammar': lambda n, c: {k: v for k, v in filter(bool, c)},
597    'Definition': lambda n, c: (c[0].value, c[2]),
598    'Expression': lambda n, c: Choice(list(c[::2])),
599    'Sequence': lambda n, c: Sequence(c),
600    'Prefix': lambda n, c: (c[0] if len(c) == 1 else
601                            c[0](c[1])),
602    'Suffix': lambda n, c: (c[0] if len(c) == 1 else
603                            c[1](c[0])),
604    'Primary': lambda n, c: (c[1] if c[0] is None else
605                             c[0]),
606    'Identifier': lambda n, c: Identifier(''.join(c[:-1])),
607    'IdentStart': lambda n, c: c[0],
608    'IdentCont': lambda n, c: c[0],
609    'Literal': lambda n, c: Literal(''.join(c[1:-2])),
610    'Class': lambda n, c: Class(c[1:-2]),
611    'Range': lambda n, c: (c[0] if len(c) == 1 else
612                           (c[0], c[2])),
613    'Char': lambda n, c: (c[0] if c[0] != '\\' else
614                          '\n' if c[1] == 'n' else
615                          '\r' if c[1] == 'r' else
616                          '\t' if c[1] == 't' else
617                          "'" if c[1] == "'" else
618                          '"' if c[1] == '"' else
619                          '[' if c[1] == '[' else
620                          ']' if c[1] == ']' else
621                          '\\' if c[1] == '\\' else
622                          chr(int(c[2:], 16))),
623    'Hex': lambda n, c: c[0],
624    'AND': lambda n, c: And,
625    'NOT': lambda n, c: Not,
626    'QUESTION': lambda n, c: Optional,
627    'STAR': lambda n, c: ZeroOrMore,
628    'PLUS': lambda n, c: OneOrMore,
629    'DOT': lambda n, c: Dot()
630}
631
632
633# HACK type alias
634util.register_type_alias('Action')
635util.register_type_alias('Expression')
636util.register_type_alias('MatchCallStack')
637util.register_type_alias('DebugCb')
class Node(typing.NamedTuple):
89class Node(typing.NamedTuple):
90    """Abstract syntax tree node.
91
92    Node names are identifiers of parser's definitions and values
93    are other nodes or string values representing matched `Literal`, `Class`
94    or `Dot` leafs.
95
96    """
97    name: str
98    value: typing.List[typing.Union['Node', str]]

Abstract syntax tree node.

Node names are identifiers of parser's definitions and values are other nodes or string values representing matched Literal, Class or Dot leafs.

Node(name: str, value: List[Union[ForwardRef('Node'), str]])

Create new instance of Node(name, value)

name: str

Alias for field number 0

value: List[Union[hat.peg.Node, str]]

Alias for field number 1

Inherited Members
builtins.tuple
index
count
Action: Type[Callable[[hat.peg.Node, List], Any]] = ~Action

Action

def walk_ast( node: hat.peg.Node, actions: Dict[str, Callable[[hat.peg.Node, List], Any]], default_action: Optional[Callable[[hat.peg.Node, List], Any]] = None) -> Any:
105def walk_ast(node: Node,
106             actions: typing.Dict[str, Action],
107             default_action: typing.Optional[Action] = None
108             ) -> typing.Any:
109    """Simple depth-first abstract syntax tree parser.
110
111    Actions are key value pairs where keys represent node names and values
112    are callables that should be called for appropriate node. Each callable
113    receives matched node and list of results from recursively applying
114    this function on child nodes. For nodes which name doesn't match any
115    action, default action is used. If default action is not defined and node
116    name doesn't match action, result is None and recursion is stopped.
117
118    """
119    action = actions.get(node.name, default_action)
120    if not action:
121        return
122    children = [walk_ast(i, actions, default_action)
123                if isinstance(i, Node) else i
124                for i in node.value]
125    return action(node, children)

Simple depth-first abstract syntax tree parser.

Actions are key value pairs where keys represent node names and values are callables that should be called for appropriate node. Each callable receives matched node and list of results from recursively applying this function on child nodes. For nodes which name doesn't match any action, default action is used. If default action is not defined and node name doesn't match action, result is None and recursion is stopped.

class Sequence(typing.NamedTuple):
128class Sequence(typing.NamedTuple):
129    expressions: typing.List['Expression']

Sequence(expressions,)

Sequence(expressions: List[ForwardRef('Expression')])

Create new instance of Sequence(expressions,)

expressions: List[~Expression]

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Choice(typing.NamedTuple):
132class Choice(typing.NamedTuple):
133    expressions: typing.List['Expression']

Choice(expressions,)

Choice(expressions: List[ForwardRef('Expression')])

Create new instance of Choice(expressions,)

expressions: List[~Expression]

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Not(typing.NamedTuple):
136class Not(typing.NamedTuple):
137    expression: 'Expression'

Not(expression,)

Not(expression: ForwardRef('Expression'))

Create new instance of Not(expression,)

expression: ~Expression

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class And(typing.NamedTuple):
140class And(typing.NamedTuple):
141    expression: 'Expression'

And(expression,)

And(expression: ForwardRef('Expression'))

Create new instance of And(expression,)

expression: ~Expression

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class OneOrMore(typing.NamedTuple):
144class OneOrMore(typing.NamedTuple):
145    expression: 'Expression'

OneOrMore(expression,)

OneOrMore(expression: ForwardRef('Expression'))

Create new instance of OneOrMore(expression,)

expression: ~Expression

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class ZeroOrMore(typing.NamedTuple):
148class ZeroOrMore(typing.NamedTuple):
149    expression: 'Expression'

ZeroOrMore(expression,)

ZeroOrMore(expression: ForwardRef('Expression'))

Create new instance of ZeroOrMore(expression,)

expression: ~Expression

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Optional(typing.NamedTuple):
152class Optional(typing.NamedTuple):
153    expression: 'Expression'

Optional(expression,)

Optional(expression: ForwardRef('Expression'))

Create new instance of Optional(expression,)

expression: ~Expression

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Identifier(typing.NamedTuple):
156class Identifier(typing.NamedTuple):
157    value: str

Identifier(value,)

Identifier(value: str)

Create new instance of Identifier(value,)

value: str

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Literal(typing.NamedTuple):
160class Literal(typing.NamedTuple):
161    value: str

Literal(value,)

Literal(value: str)

Create new instance of Literal(value,)

value: str

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Class(typing.NamedTuple):
164class Class(typing.NamedTuple):
165    values: typing.List[typing.Union[str, typing.Tuple[str, str]]]

Class(values,)

Class(values: List[Union[str, Tuple[str, str]]])

Create new instance of Class(values,)

values: List[Union[str, Tuple[str, str]]]

Alias for field number 0

Inherited Members
builtins.tuple
index
count
class Dot(typing.NamedTuple):
168class Dot(typing.NamedTuple):
169    pass

Dot()

Dot()

Create new instance of Dot()

Inherited Members
builtins.tuple
index
count

Expression

class MatchResult(typing.NamedTuple):
177class MatchResult(typing.NamedTuple):
178    """Match result
179
180    Args:
181        node: matched node or ``None`` if match failed
182        rest: remaining input data
183
184    """
185    node: typing.Optional[Node]
186    rest: str

Match result

Args
  • node: matched node or None if match failed
  • rest: remaining input data
MatchResult(node: Optional[hat.peg.Node], rest: str)

Create new instance of MatchResult(node, rest)

node: Optional[hat.peg.Node]

Alias for field number 0

rest: str

Alias for field number 1

Inherited Members
builtins.tuple
index
count
class MatchCallFrame(typing.NamedTuple):
189class MatchCallFrame(typing.NamedTuple):
190    """Match call frame
191
192    Args:
193        name: definition name
194        data: input data
195
196    """
197    name: str
198    data: str

Match call frame

Args
  • name: definition name
  • data: input data
MatchCallFrame(name: str, data: str)

Create new instance of MatchCallFrame(name, data)

name: str

Alias for field number 0

data: str

Alias for field number 1

Inherited Members
builtins.tuple
index
count
MatchCallStack: Type[Iterable[hat.peg.MatchCallFrame]] = ~MatchCallStack

Match call stack

DebugCb: Type[Callable[[hat.peg.MatchResult, Iterable[hat.peg.MatchCallFrame]], NoneType]] = ~DebugCb

Debug callback

class Grammar:
209class Grammar:
210    """PEG Grammar.
211
212    Args:
213        definitions: grammar definitions
214        starting: starting definition name
215
216    """
217
218    def __init__(self,
219                 definitions: typing.Union[str,
220                                           typing.Dict[str, Expression]],
221                 starting: str):
222        if isinstance(definitions, str):
223            ast = _peg_grammar.parse(definitions)
224            definitions = walk_ast(ast, _peg_actions)
225            definitions = {k: _reduce_expression(v)
226                           for k, v in definitions.items()}
227        self._definitions = definitions
228        self._starting = starting
229
230    @property
231    def definitions(self) -> typing.Dict[str, Expression]:
232        """Definitions"""
233        return self._definitions
234
235    @property
236    def starting(self) -> str:
237        """Starting definition name"""
238        return self._starting
239
240    def parse(self,
241              data: str,
242              debug_cb: typing.Optional[DebugCb] = None
243              ) -> Node:
244        """Parse input data.
245
246        `debug_cb` is optional function which can be used for monitoring and
247        debugging parse steps. It is called each time named definition
248        is successfully or unsuccessfully matched. This function receives
249        match result and match call stack.
250
251        """
252        state = _State(grammar=self,
253                       data=data,
254                       call_stack=_MatchCallStack(None, None),
255                       debug_cb=debug_cb)
256        result = _match(state, self._starting)
257        if result.node is None:
258            raise Exception("could not match starting definition")
259        x = _reduce_node(result.node)
260        return x

PEG Grammar.

Args
  • definitions: grammar definitions
  • starting: starting definition name
Grammar( definitions: Union[str, Dict[str, Union[hat.peg.Sequence, hat.peg.Choice, hat.peg.Not, hat.peg.And, hat.peg.OneOrMore, hat.peg.ZeroOrMore, hat.peg.Optional, hat.peg.Identifier, hat.peg.Literal, hat.peg.Class, hat.peg.Dot]]], starting: str)
218    def __init__(self,
219                 definitions: typing.Union[str,
220                                           typing.Dict[str, Expression]],
221                 starting: str):
222        if isinstance(definitions, str):
223            ast = _peg_grammar.parse(definitions)
224            definitions = walk_ast(ast, _peg_actions)
225            definitions = {k: _reduce_expression(v)
226                           for k, v in definitions.items()}
227        self._definitions = definitions
228        self._starting = starting
starting: str

Starting definition name

def parse( self, data: str, debug_cb: Optional[Callable[[hat.peg.MatchResult, Iterable[hat.peg.MatchCallFrame]], NoneType]] = None) -> hat.peg.Node:
240    def parse(self,
241              data: str,
242              debug_cb: typing.Optional[DebugCb] = None
243              ) -> Node:
244        """Parse input data.
245
246        `debug_cb` is optional function which can be used for monitoring and
247        debugging parse steps. It is called each time named definition
248        is successfully or unsuccessfully matched. This function receives
249        match result and match call stack.
250
251        """
252        state = _State(grammar=self,
253                       data=data,
254                       call_stack=_MatchCallStack(None, None),
255                       debug_cb=debug_cb)
256        result = _match(state, self._starting)
257        if result.node is None:
258            raise Exception("could not match starting definition")
259        x = _reduce_node(result.node)
260        return x

Parse input data.

debug_cb is optional function which can be used for monitoring and debugging parse steps. It is called each time named definition is successfully or unsuccessfully matched. This function receives match result and match call stack.

def console_debug_cb( result: hat.peg.MatchResult, call_stack: Iterable[hat.peg.MatchCallFrame])
263def console_debug_cb(result: MatchResult, call_stack: MatchCallStack):
264    """Simple console debugger."""
265    success = '+++' if result.node else '---'
266    stack = ', '.join(frame.name for frame in call_stack)
267    consumed = util.first(call_stack).data[:-len(result.rest)]
268    print(success, stack)
269    print('<<<', consumed)
270    print('>>>', result.rest, flush=True)

Simple console debugger.