hat.peg

PEG Parser

Implementation of PEG parser as described in paper "Parsing Expression Grammars: A Recognition-Based Syntactic Foundation" (Bryan Ford, 2004). PEG's grammar itself can be defined by PEG grammar::

# Hierarchical syntax
Grammar    <- Spacing Definition+ EndOfFile
Definition <- Identifier LEFTARROW Expression
Expression <- Sequence (SLASH Sequence)*
Sequence   <- Prefix*
Prefix     <- (AND / NOT)? Suffix
Suffix     <- Primary (QUESTION / STAR / PLUS)?
Primary    <- Identifier !LEFTARROW
            / OPEN Expression CLOSE
            / Literal
            / Class
            / DOT

# Lexical syntax
Identifier <- IdentStart IdentCont* Spacing
IdentStart <- [a-zA-Z_]
IdentCont  <- IdentStart / [0-9]
Literal    <- ['] (!['] Char)* ['] Spacing
            / ["] (!["] Char)* ["] Spacing
Class      <- '[' (!']' Range)* ']' Spacing
Range      <- Char '-' Char / Char
Char       <- '\\' [nrt'"\[\]\\]
            / '\\' 'x' Hex Hex
            / '\\' 'u' Hex Hex Hex Hex
            / !'\\' .
Hex        <- [0-9a-fA-F]

LEFTARROW  <- '<-' Spacing
SLASH      <- '/' Spacing
AND        <- '&' Spacing
NOT        <- '!' Spacing
QUESTION   <- '?' Spacing
STAR       <- '*' Spacing
PLUS       <- '+' Spacing
OPEN       <- '(' Spacing
CLOSE      <- ')' Spacing
DOT        <- '.' Spacing

Spacing    <- (Space / Comment)*
Comment    <- '#' (!EndOfLine .)* EndOfLine
Space      <- ' ' / '\t' / EndOfLine
EndOfLine  <- '\r\n' / '\n' / '\r'
EndOfFile  <- !.

Example usage of PEG parser::

import functools
import hat.peg

grammar = hat.peg.Grammar(r'''
    Expr    <- Sum
    Sum     <- Product (('+' / '-') Product)*
    Product <- Value (('*' / '/') Value)*
    Value   <- Spacing ([0-9]+ / '(' Expr ')') Spacing
    Spacing <- ' '*
''', 'Expr')

ast = grammar.parse('1 + 2 * 3 + (4 - 5) * 6 + 7')

result = hat.peg.walk_ast(ast, {
    'Expr': lambda n, c: c[0],
    'Sum': lambda n, c: functools.reduce(
        (lambda acc, x: acc + x[1] if x[0] == '+' else acc - x[1]),
        zip(c[1::2], c[2::2]),
        c[0]),
    'Product': lambda n, c: functools.reduce(
        (lambda acc, x: acc * x[1] if x[0] == '*' else acc / x[1]),
        zip(c[1::2], c[2::2]),
        c[0]),
    'Value': lambda n, c: (c[2] if c[1] == '(' else
                           int(''.join(c[1:-1])))})

assert result == 8
  1r"""PEG Parser
  2
  3Implementation of PEG parser as described in paper "Parsing Expression
  4Grammars: A Recognition-Based Syntactic Foundation" (Bryan Ford, 2004).
  5PEG's grammar itself can be defined by PEG grammar::
  6
  7    # Hierarchical syntax
  8    Grammar    <- Spacing Definition+ EndOfFile
  9    Definition <- Identifier LEFTARROW Expression
 10    Expression <- Sequence (SLASH Sequence)*
 11    Sequence   <- Prefix*
 12    Prefix     <- (AND / NOT)? Suffix
 13    Suffix     <- Primary (QUESTION / STAR / PLUS)?
 14    Primary    <- Identifier !LEFTARROW
 15                / OPEN Expression CLOSE
 16                / Literal
 17                / Class
 18                / DOT
 19
 20    # Lexical syntax
 21    Identifier <- IdentStart IdentCont* Spacing
 22    IdentStart <- [a-zA-Z_]
 23    IdentCont  <- IdentStart / [0-9]
 24    Literal    <- ['] (!['] Char)* ['] Spacing
 25                / ["] (!["] Char)* ["] Spacing
 26    Class      <- '[' (!']' Range)* ']' Spacing
 27    Range      <- Char '-' Char / Char
 28    Char       <- '\\' [nrt'"\[\]\\]
 29                / '\\' 'x' Hex Hex
 30                / '\\' 'u' Hex Hex Hex Hex
 31                / !'\\' .
 32    Hex        <- [0-9a-fA-F]
 33
 34    LEFTARROW  <- '<-' Spacing
 35    SLASH      <- '/' Spacing
 36    AND        <- '&' Spacing
 37    NOT        <- '!' Spacing
 38    QUESTION   <- '?' Spacing
 39    STAR       <- '*' Spacing
 40    PLUS       <- '+' Spacing
 41    OPEN       <- '(' Spacing
 42    CLOSE      <- ')' Spacing
 43    DOT        <- '.' Spacing
 44
 45    Spacing    <- (Space / Comment)*
 46    Comment    <- '#' (!EndOfLine .)* EndOfLine
 47    Space      <- ' ' / '\t' / EndOfLine
 48    EndOfLine  <- '\r\n' / '\n' / '\r'
 49    EndOfFile  <- !.
 50
 51Example usage of PEG parser::
 52
 53    import functools
 54    import hat.peg
 55
 56    grammar = hat.peg.Grammar(r'''
 57        Expr    <- Sum
 58        Sum     <- Product (('+' / '-') Product)*
 59        Product <- Value (('*' / '/') Value)*
 60        Value   <- Spacing ([0-9]+ / '(' Expr ')') Spacing
 61        Spacing <- ' '*
 62    ''', 'Expr')
 63
 64    ast = grammar.parse('1 + 2 * 3 + (4 - 5) * 6 + 7')
 65
 66    result = hat.peg.walk_ast(ast, {
 67        'Expr': lambda n, c: c[0],
 68        'Sum': lambda n, c: functools.reduce(
 69            (lambda acc, x: acc + x[1] if x[0] == '+' else acc - x[1]),
 70            zip(c[1::2], c[2::2]),
 71            c[0]),
 72        'Product': lambda n, c: functools.reduce(
 73            (lambda acc, x: acc * x[1] if x[0] == '*' else acc / x[1]),
 74            zip(c[1::2], c[2::2]),
 75            c[0]),
 76        'Value': lambda n, c: (c[2] if c[1] == '(' else
 77                               int(''.join(c[1:-1])))})
 78
 79    assert result == 8
 80
 81"""
 82
 83import typing
 84
 85from hat import util
 86
 87
 88class Node(typing.NamedTuple):
 89    """Abstract syntax tree node.
 90
 91    Node names are identifiers of parser's definitions and values
 92    are other nodes or string values representing matched `Literal`, `Class`
 93    or `Dot` leafs.
 94
 95    """
 96    name: str
 97    value: list[typing.Union['Node', str]]
 98
 99
100Action: typing.TypeAlias = typing.Callable[[Node, list], typing.Any]
101"""Action"""
102
103
104def walk_ast(node: Node,
105             actions: dict[str, Action],
106             default_action: Action | None = None
107             ) -> typing.Any:
108    """Simple depth-first abstract syntax tree parser.
109
110    Actions are key value pairs where keys represent node names and values
111    are callables that should be called for appropriate node. Each callable
112    receives matched node and list of results from recursively applying
113    this function on child nodes. For nodes which name doesn't match any
114    action, default action is used. If default action is not defined and node
115    name doesn't match action, result is None and recursion is stopped.
116
117    """
118    action = actions.get(node.name, default_action)
119    if not action:
120        return
121
122    children = [walk_ast(i, actions, default_action)
123                if isinstance(i, Node) else i
124                for i in node.value]
125
126    return action(node, children)
127
128
129class Sequence(typing.NamedTuple):
130    expressions: typing.List['Expression']
131
132
133class Choice(typing.NamedTuple):
134    expressions: typing.List['Expression']
135
136
137class Not(typing.NamedTuple):
138    expression: 'Expression'
139
140
141class And(typing.NamedTuple):
142    expression: 'Expression'
143
144
145class OneOrMore(typing.NamedTuple):
146    expression: 'Expression'
147
148
149class ZeroOrMore(typing.NamedTuple):
150    expression: 'Expression'
151
152
153class Optional(typing.NamedTuple):
154    expression: 'Expression'
155
156
157class Identifier(typing.NamedTuple):
158    value: str
159
160
161class Literal(typing.NamedTuple):
162    value: str
163
164
165class Class(typing.NamedTuple):
166    values: list[str | tuple[str, str]]
167
168
169class Dot(typing.NamedTuple):
170    pass
171
172
173Expression: typing.TypeAlias = (Sequence | Choice | Not | And | OneOrMore |
174                                ZeroOrMore | Optional | Identifier | Literal |
175                                Class | Dot)
176"""Expression"""
177
178
179class MatchResult(typing.NamedTuple):
180    """Match result
181
182    Args:
183        node: matched node or ``None`` if match failed
184        rest: remaining input data
185
186    """
187    node: Node | None
188    rest: str
189
190
191class MatchCallFrame(typing.NamedTuple):
192    """Match call frame
193
194    Args:
195        name: definition name
196        data: input data
197
198    """
199    name: str
200    data: str
201
202
203MatchCallStack: typing.TypeAlias = typing.Iterable[MatchCallFrame]
204"""Match call stack"""
205
206
207DebugCb: typing.TypeAlias = typing.Callable[[MatchResult, MatchCallStack],
208                                            None]
209"""Debug callback"""
210
211
212class Grammar:
213    """PEG Grammar.
214
215    Args:
216        definitions: grammar definitions
217        starting: starting definition name
218
219    """
220
221    def __init__(self,
222                 definitions: str | dict[str, Expression],
223                 starting: str):
224        if isinstance(definitions, str):
225            ast = _peg_grammar.parse(definitions)
226            definitions = walk_ast(ast, _peg_actions)
227            definitions = {k: _reduce_expression(v)
228                           for k, v in definitions.items()}
229        self._definitions = definitions
230        self._starting = starting
231
232    @property
233    def definitions(self) -> dict[str, Expression]:
234        """Definitions"""
235        return self._definitions
236
237    @property
238    def starting(self) -> str:
239        """Starting definition name"""
240        return self._starting
241
242    def parse(self,
243              data: str,
244              debug_cb: DebugCb | None = None
245              ) -> Node:
246        """Parse input data.
247
248        `debug_cb` is optional function which can be used for monitoring and
249        debugging parse steps. It is called each time named definition
250        is successfully or unsuccessfully matched. This function receives
251        match result and match call stack.
252
253        """
254        state = _State(grammar=self,
255                       data=data,
256                       call_stack=_MatchCallStack(None, None),
257                       debug_cb=debug_cb)
258        result = _match(state, self._starting)
259        if result.node is None:
260            raise Exception("could not match starting definition")
261        x = _reduce_node(result.node)
262        return x
263
264
265def console_debug_cb(result: MatchResult, call_stack: MatchCallStack):
266    """Simple console debugger."""
267    success = '+++' if result.node else '---'
268    stack = ', '.join(frame.name for frame in call_stack)
269    consumed = util.first(call_stack).data[:-len(result.rest)]
270    print(success, stack)
271    print('<<<', consumed)
272    print('>>>', result.rest, flush=True)
273
274
275class _MatchCallStack(typing.NamedTuple):
276    frame: MatchCallFrame | None
277    previous: typing.Optional['_MatchCallStack']
278
279    def __iter__(self):
280        current = self
281        while current and current.frame:
282            yield current.frame
283            current = current.previous
284
285
286class _State(typing.NamedTuple):
287    grammar: Grammar
288    data: str
289    call_stack: _MatchCallStack
290    debug_cb: DebugCb
291
292
293def _match(state, name):
294    call_frame = MatchCallFrame(name, state.data)
295    if call_frame in state.call_stack:
296        raise Exception('Infinite matching recursion detected')
297    state = state._replace(
298        call_stack=_MatchCallStack(call_frame, state.call_stack))
299    result = _match_expression(state, state.grammar.definitions[name])
300    if result.node:
301        result = result._replace(
302            node=result.node._replace(name=name))
303    if state.debug_cb:
304        debug_result = MatchResult(
305            node=(_reduce_node(result.node) if result.node else None),
306            rest=result.rest)
307        state.debug_cb(debug_result, state.call_stack)
308    return result
309
310
311def _match_expression(state, expression):
312    t = type(expression)
313    if t == Choice:
314        return _match_choice(state, expression)
315    if t == Sequence:
316        return _match_sequence(state, expression)
317    if t == Not:
318        return _match_not(state, expression)
319    if t == And:
320        return _match_and(state, expression)
321    if t == OneOrMore:
322        return _match_oneormore(state, expression)
323    if t == ZeroOrMore:
324        return _match_zeroormore(state, expression)
325    if t == Optional:
326        return _match_optional(state, expression)
327    if t == Identifier:
328        return _match_identifier(state, expression)
329    if t == Literal:
330        return _match_literal(state, expression)
331    if t == Class:
332        return _match_class(state, expression)
333    if t == Dot:
334        return _match_dot(state, expression)
335    raise ValueError('unsupported expression type')
336
337
338def _match_choice(state, expression):
339    for i in expression.expressions:
340        result = _match_expression(state, i)
341        if result.node:
342            return result
343    return MatchResult(None, state.data)
344
345
346def _match_sequence(state, expression):
347    nodes = []
348    for i in expression.expressions:
349        result = _match_expression(state, i)
350        if not result.node:
351            return MatchResult(None, state.data)
352        nodes.append(result.node)
353        state = state._replace(data=result.rest)
354    return MatchResult(Node(None, nodes), state.data)
355
356
357def _match_not(state, expression):
358    result = _match_expression(state, expression.expression)
359    return (MatchResult(None, state.data) if result.node
360            else MatchResult(Node(None, []), state.data))
361
362
363def _match_and(state, expression):
364    node = _match_expression(state, expression.expression).node
365    return MatchResult(Node(None, []) if node else None, state.data)
366
367
368def _match_oneormore(state, expression):
369    return _match_expression(state, Sequence([
370        expression.expression,
371        ZeroOrMore(expression.expression)]))
372
373
374def _match_zeroormore(state, expression):
375    nodes = []
376    while True:
377        result = _match_expression(state, expression.expression)
378        if not result.node:
379            return MatchResult(Node(None, nodes), state.data)
380        nodes.append(result.node)
381        state = state._replace(data=result.rest)
382
383
384def _match_optional(state, expression):
385    result = _match_expression(state, expression.expression)
386    if result.node:
387        return result
388    return MatchResult(Node(None, []), state.data)
389
390
391def _match_identifier(state, expression):
392    result = _match(state, expression.value)
393    if result.node:
394        result = result._replace(node=Node(None, [result.node]))
395    return result
396
397
398def _match_literal(state, expression):
399    data = state.data[:len(expression.value)]
400    if data != expression.value:
401        return MatchResult(None, state.data)
402    rest = state.data[len(expression.value):]
403    return MatchResult(Node(None, [data]), rest)
404
405
406def _match_class(state, expression):
407    if state.data:
408        for i in expression.values:
409            if isinstance(i, str):
410                matched = state.data[0] == i
411            else:
412                matched = i[0] <= state.data[0] <= i[1]
413            if matched:
414                return MatchResult(Node(None, [state.data[:1]]),
415                                   state.data[1:])
416    return MatchResult(None, state.data)
417
418
419def _match_dot(state, expression):
420    if not state.data:
421        return MatchResult(None, state.data)
422    return MatchResult(Node(None, [state.data[:1]]), state.data[1:])
423
424
425def _reduce_expression(expr):
426    if hasattr(expr, 'expressions'):
427        if len(expr.expressions) == 1:
428            return _reduce_expression(expr.expressions[0])
429        return expr._replace(expressions=[_reduce_expression(i)
430                                          for i in expr.expressions])
431    if hasattr(expr, 'expression'):
432        return expr._replace(expression=_reduce_expression(expr.expression))
433    return expr
434
435
436def _reduce_node(node):
437    return node._replace(value=list(_reduce_node_list(node.value)))
438
439
440def _reduce_node_list(nodes):
441    for node in nodes:
442        if not isinstance(node, Node):
443            yield node
444        elif node.name is None:
445            yield from _reduce_node_list(node.value)
446        else:
447            yield _reduce_node(node)
448
449
450_peg_grammar = Grammar({
451    'Grammar': Sequence([
452        Identifier('Spacing'),
453        OneOrMore(Identifier('Definition')),
454        Identifier('EndOfFile')]),
455    'Definition': Sequence([
456        Identifier('Identifier'),
457        Identifier('LEFTARROW'),
458        Identifier('Expression')]),
459    'Expression': Sequence([
460        Identifier('Sequence'),
461        ZeroOrMore(Sequence([
462            Identifier('SLASH'),
463            Identifier('Sequence')]))]),
464    'Sequence': ZeroOrMore(Identifier('Prefix')),
465    'Prefix': Sequence([
466        Optional(Choice([
467            Identifier('AND'),
468            Identifier('NOT')])),
469        Identifier('Suffix')]),
470    'Suffix': Sequence([
471        Identifier('Primary'),
472        Optional(Choice([
473            Identifier('QUESTION'),
474            Identifier('STAR'),
475            Identifier('PLUS')]))]),
476    'Primary': Choice([
477        Sequence([
478            Identifier('Identifier'),
479            Not(Identifier('LEFTARROW'))]),
480        Sequence([
481            Identifier('OPEN'),
482            Identifier('Expression'),
483            Identifier('CLOSE')]),
484        Identifier('Literal'),
485        Identifier('Class'),
486        Identifier('DOT')]),
487    'Identifier': Sequence([
488        Identifier('IdentStart'),
489        ZeroOrMore(Identifier('IdentCont')),
490        Identifier('Spacing')]),
491    'IdentStart': Class([('a', 'z'),
492                         ('A', 'Z'),
493                         '_']),
494    'IdentCont': Choice([
495        Identifier('IdentStart'),
496        Class([('0', '9')])]),
497    'Literal': Choice([
498        Sequence([
499            Class(["'"]),
500            ZeroOrMore(Sequence([
501                Not(Class(["'"])),
502                Identifier('Char')])),
503            Class(["'"]),
504            Identifier('Spacing')]),
505        Sequence([
506            Class(['"']),
507            ZeroOrMore(Sequence([
508                Not(Class(['"'])),
509                Identifier('Char')])),
510            Class(['"']),
511            Identifier('Spacing')])]),
512    'Class': Sequence([
513        Literal('['),
514        ZeroOrMore(Sequence([
515            Not(Literal(']')),
516            Identifier('Range')])),
517        Literal(']'),
518        Identifier('Spacing')]),
519    'Range': Choice([
520        Sequence([
521            Identifier('Char'),
522            Literal('-'),
523            Identifier('Char')]),
524        Identifier('Char')]),
525    'Char': Choice([
526        Sequence([
527            Literal('\\'),
528            Class(['n', 'r', 't', "'", '"', '[', ']', '\\'])]),
529        Sequence([
530            Literal('\\'),
531            Literal('x'),
532            Identifier('Hex'),
533            Identifier('Hex')]),
534        Sequence([
535            Literal('\\'),
536            Literal('u'),
537            Identifier('Hex'),
538            Identifier('Hex'),
539            Identifier('Hex'),
540            Identifier('Hex')]),
541        Sequence([
542            Not(Literal('\\')),
543            Dot()])]),
544    'Hex': Class([('0', '9'),
545                  ('a', 'f'),
546                  ('A', 'F')]),
547    'LEFTARROW': Sequence([
548        Literal('<-'),
549        Identifier('Spacing')]),
550    'SLASH': Sequence([
551        Literal('/'),
552        Identifier('Spacing')]),
553    'AND': Sequence([
554        Literal('&'),
555        Identifier('Spacing')]),
556    'NOT': Sequence([
557        Literal('!'),
558        Identifier('Spacing')]),
559    'QUESTION': Sequence([
560        Literal('?'),
561        Identifier('Spacing')]),
562    'STAR': Sequence([
563        Literal('*'),
564        Identifier('Spacing')]),
565    'PLUS': Sequence([
566        Literal('+'),
567        Identifier('Spacing')]),
568    'OPEN': Sequence([
569        Literal('('),
570        Identifier('Spacing')]),
571    'CLOSE': Sequence([
572        Literal(')'),
573        Identifier('Spacing')]),
574    'DOT': Sequence([
575        Literal('.'),
576        Identifier('Spacing')]),
577    'Spacing': ZeroOrMore(Choice([
578        Identifier('Space'),
579        Identifier('Comment')])),
580    'Comment': Sequence([
581        Literal('#'),
582        ZeroOrMore(Sequence([
583            Not(Identifier('EndOfLine')),
584            Dot()])),
585        Identifier('EndOfLine')]),
586    'Space': Choice([
587        Literal(' '),
588        Literal('\t'),
589        Identifier('EndOfLine')]),
590    'EndOfLine': Choice([
591        Literal('\r\n'),
592        Literal('\n'),
593        Literal('\r')]),
594    'EndOfFile': Not(Dot())
595}, 'Grammar')
596
597
598_peg_actions = {
599    'Grammar': lambda n, c: {k: v for k, v in filter(bool, c)},
600    'Definition': lambda n, c: (c[0].value, c[2]),
601    'Expression': lambda n, c: Choice(list(c[::2])),
602    'Sequence': lambda n, c: Sequence(c),
603    'Prefix': lambda n, c: (c[0] if len(c) == 1 else
604                            c[0](c[1])),
605    'Suffix': lambda n, c: (c[0] if len(c) == 1 else
606                            c[1](c[0])),
607    'Primary': lambda n, c: (c[1] if c[0] is None else
608                             c[0]),
609    'Identifier': lambda n, c: Identifier(''.join(c[:-1])),
610    'IdentStart': lambda n, c: c[0],
611    'IdentCont': lambda n, c: c[0],
612    'Literal': lambda n, c: Literal(''.join(c[1:-2])),
613    'Class': lambda n, c: Class(c[1:-2]),
614    'Range': lambda n, c: (c[0] if len(c) == 1 else
615                           (c[0], c[2])),
616    'Char': lambda n, c: (c[0] if c[0] != '\\' else
617                          '\n' if c[1] == 'n' else
618                          '\r' if c[1] == 'r' else
619                          '\t' if c[1] == 't' else
620                          "'" if c[1] == "'" else
621                          '"' if c[1] == '"' else
622                          '[' if c[1] == '[' else
623                          ']' if c[1] == ']' else
624                          '\\' if c[1] == '\\' else
625                          chr(int(c[2:], 16))),
626    'Hex': lambda n, c: c[0],
627    'AND': lambda n, c: And,
628    'NOT': lambda n, c: Not,
629    'QUESTION': lambda n, c: Optional,
630    'STAR': lambda n, c: ZeroOrMore,
631    'PLUS': lambda n, c: OneOrMore,
632    'DOT': lambda n, c: Dot()}
class Node(typing.NamedTuple):
89class Node(typing.NamedTuple):
90    """Abstract syntax tree node.
91
92    Node names are identifiers of parser's definitions and values
93    are other nodes or string values representing matched `Literal`, `Class`
94    or `Dot` leafs.
95
96    """
97    name: str
98    value: list[typing.Union['Node', str]]

Abstract syntax tree node.

Node names are identifiers of parser's definitions and values are other nodes or string values representing matched Literal, Class or Dot leafs.

Node(name: str, value: list[typing.Union[ForwardRef('Node'), str]])

Create new instance of Node(name, value)

name: str

Alias for field number 0

value: list[typing.Union[Node, str]]

Alias for field number 1

Action: TypeAlias = Callable[[Node, list], Any]

Action

def walk_ast( node: Node, actions: dict[str, typing.Callable[[Node, list], typing.Any]], default_action: Optional[Callable[[Node, list], Any]] = None) -> Any:
105def walk_ast(node: Node,
106             actions: dict[str, Action],
107             default_action: Action | None = None
108             ) -> typing.Any:
109    """Simple depth-first abstract syntax tree parser.
110
111    Actions are key value pairs where keys represent node names and values
112    are callables that should be called for appropriate node. Each callable
113    receives matched node and list of results from recursively applying
114    this function on child nodes. For nodes which name doesn't match any
115    action, default action is used. If default action is not defined and node
116    name doesn't match action, result is None and recursion is stopped.
117
118    """
119    action = actions.get(node.name, default_action)
120    if not action:
121        return
122
123    children = [walk_ast(i, actions, default_action)
124                if isinstance(i, Node) else i
125                for i in node.value]
126
127    return action(node, children)

Simple depth-first abstract syntax tree parser.

Actions are key value pairs where keys represent node names and values are callables that should be called for appropriate node. Each callable receives matched node and list of results from recursively applying this function on child nodes. For nodes which name doesn't match any action, default action is used. If default action is not defined and node name doesn't match action, result is None and recursion is stopped.

class Sequence(typing.NamedTuple):
130class Sequence(typing.NamedTuple):
131    expressions: typing.List['Expression']

Sequence(expressions,)

Sequence(expressions: List[ForwardRef('Expression')])

Create new instance of Sequence(expressions,)

expressions: List[Sequence | Choice | Not | And | OneOrMore | ZeroOrMore | Optional | Identifier | Literal | Class | Dot]

Alias for field number 0

class Choice(typing.NamedTuple):
134class Choice(typing.NamedTuple):
135    expressions: typing.List['Expression']

Choice(expressions,)

Choice(expressions: List[ForwardRef('Expression')])

Create new instance of Choice(expressions,)

expressions: List[Sequence | Choice | Not | And | OneOrMore | ZeroOrMore | Optional | Identifier | Literal | Class | Dot]

Alias for field number 0

class Not(typing.NamedTuple):
138class Not(typing.NamedTuple):
139    expression: 'Expression'

Not(expression,)

Not(expression: ForwardRef('Expression'))

Create new instance of Not(expression,)

Alias for field number 0

class And(typing.NamedTuple):
142class And(typing.NamedTuple):
143    expression: 'Expression'

And(expression,)

And(expression: ForwardRef('Expression'))

Create new instance of And(expression,)

Alias for field number 0

class OneOrMore(typing.NamedTuple):
146class OneOrMore(typing.NamedTuple):
147    expression: 'Expression'

OneOrMore(expression,)

OneOrMore(expression: ForwardRef('Expression'))

Create new instance of OneOrMore(expression,)

Alias for field number 0

class ZeroOrMore(typing.NamedTuple):
150class ZeroOrMore(typing.NamedTuple):
151    expression: 'Expression'

ZeroOrMore(expression,)

ZeroOrMore(expression: ForwardRef('Expression'))

Create new instance of ZeroOrMore(expression,)

Alias for field number 0

class Optional(typing.NamedTuple):
154class Optional(typing.NamedTuple):
155    expression: 'Expression'

Optional(expression,)

Optional(expression: ForwardRef('Expression'))

Create new instance of Optional(expression,)

Alias for field number 0

class Identifier(typing.NamedTuple):
158class Identifier(typing.NamedTuple):
159    value: str

Identifier(value,)

Identifier(value: str)

Create new instance of Identifier(value,)

value: str

Alias for field number 0

class Literal(typing.NamedTuple):
162class Literal(typing.NamedTuple):
163    value: str

Literal(value,)

Literal(value: str)

Create new instance of Literal(value,)

value: str

Alias for field number 0

class Class(typing.NamedTuple):
166class Class(typing.NamedTuple):
167    values: list[str | tuple[str, str]]

Class(values,)

Class(values: list[str | tuple[str, str]])

Create new instance of Class(values,)

values: list[str | tuple[str, str]]

Alias for field number 0

class Dot(typing.NamedTuple):
170class Dot(typing.NamedTuple):
171    pass

Dot()

Dot()

Create new instance of Dot()

Expression: TypeAlias = Sequence | Choice | Not | And | OneOrMore | ZeroOrMore | Optional | Identifier | Literal | Class | Dot

Expression

class MatchResult(typing.NamedTuple):
180class MatchResult(typing.NamedTuple):
181    """Match result
182
183    Args:
184        node: matched node or ``None`` if match failed
185        rest: remaining input data
186
187    """
188    node: Node | None
189    rest: str

Match result

Arguments:
  • node: matched node or None if match failed
  • rest: remaining input data
MatchResult(node: Node | None, rest: str)

Create new instance of MatchResult(node, rest)

node: Node | None

Alias for field number 0

rest: str

Alias for field number 1

class MatchCallFrame(typing.NamedTuple):
192class MatchCallFrame(typing.NamedTuple):
193    """Match call frame
194
195    Args:
196        name: definition name
197        data: input data
198
199    """
200    name: str
201    data: str

Match call frame

Arguments:
  • name: definition name
  • data: input data
MatchCallFrame(name: str, data: str)

Create new instance of MatchCallFrame(name, data)

name: str

Alias for field number 0

data: str

Alias for field number 1

MatchCallStack: TypeAlias = Iterable[MatchCallFrame]

Match call stack

DebugCb: TypeAlias = Callable[[MatchResult, Iterable[MatchCallFrame]], NoneType]

Debug callback

class Grammar:
213class Grammar:
214    """PEG Grammar.
215
216    Args:
217        definitions: grammar definitions
218        starting: starting definition name
219
220    """
221
222    def __init__(self,
223                 definitions: str | dict[str, Expression],
224                 starting: str):
225        if isinstance(definitions, str):
226            ast = _peg_grammar.parse(definitions)
227            definitions = walk_ast(ast, _peg_actions)
228            definitions = {k: _reduce_expression(v)
229                           for k, v in definitions.items()}
230        self._definitions = definitions
231        self._starting = starting
232
233    @property
234    def definitions(self) -> dict[str, Expression]:
235        """Definitions"""
236        return self._definitions
237
238    @property
239    def starting(self) -> str:
240        """Starting definition name"""
241        return self._starting
242
243    def parse(self,
244              data: str,
245              debug_cb: DebugCb | None = None
246              ) -> Node:
247        """Parse input data.
248
249        `debug_cb` is optional function which can be used for monitoring and
250        debugging parse steps. It is called each time named definition
251        is successfully or unsuccessfully matched. This function receives
252        match result and match call stack.
253
254        """
255        state = _State(grammar=self,
256                       data=data,
257                       call_stack=_MatchCallStack(None, None),
258                       debug_cb=debug_cb)
259        result = _match(state, self._starting)
260        if result.node is None:
261            raise Exception("could not match starting definition")
262        x = _reduce_node(result.node)
263        return x

PEG Grammar.

Arguments:
  • definitions: grammar definitions
  • starting: starting definition name
Grammar( definitions: str | dict[str, Sequence | Choice | Not | And | OneOrMore | ZeroOrMore | Optional | Identifier | Literal | Class | Dot], starting: str)
222    def __init__(self,
223                 definitions: str | dict[str, Expression],
224                 starting: str):
225        if isinstance(definitions, str):
226            ast = _peg_grammar.parse(definitions)
227            definitions = walk_ast(ast, _peg_actions)
228            definitions = {k: _reduce_expression(v)
229                           for k, v in definitions.items()}
230        self._definitions = definitions
231        self._starting = starting
definitions: dict[str, Sequence | Choice | Not | And | OneOrMore | ZeroOrMore | Optional | Identifier | Literal | Class | Dot]
233    @property
234    def definitions(self) -> dict[str, Expression]:
235        """Definitions"""
236        return self._definitions

Definitions

starting: str
238    @property
239    def starting(self) -> str:
240        """Starting definition name"""
241        return self._starting

Starting definition name

def parse( self, data: str, debug_cb: Optional[Callable[[MatchResult, Iterable[MatchCallFrame]], NoneType]] = None) -> Node:
243    def parse(self,
244              data: str,
245              debug_cb: DebugCb | None = None
246              ) -> Node:
247        """Parse input data.
248
249        `debug_cb` is optional function which can be used for monitoring and
250        debugging parse steps. It is called each time named definition
251        is successfully or unsuccessfully matched. This function receives
252        match result and match call stack.
253
254        """
255        state = _State(grammar=self,
256                       data=data,
257                       call_stack=_MatchCallStack(None, None),
258                       debug_cb=debug_cb)
259        result = _match(state, self._starting)
260        if result.node is None:
261            raise Exception("could not match starting definition")
262        x = _reduce_node(result.node)
263        return x

Parse input data.

debug_cb is optional function which can be used for monitoring and debugging parse steps. It is called each time named definition is successfully or unsuccessfully matched. This function receives match result and match call stack.

def console_debug_cb( result: MatchResult, call_stack: Iterable[MatchCallFrame]):
266def console_debug_cb(result: MatchResult, call_stack: MatchCallStack):
267    """Simple console debugger."""
268    success = '+++' if result.node else '---'
269    stack = ', '.join(frame.name for frame in call_stack)
270    consumed = util.first(call_stack).data[:-len(result.rest)]
271    print(success, stack)
272    print('<<<', consumed)
273    print('>>>', result.rest, flush=True)

Simple console debugger.