Spaces:
Running
Running
# The following YAML grammar is LL(1) and is parsed by a recursive descent | |
# parser. | |
# | |
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END | |
# implicit_document ::= block_node DOCUMENT-END* | |
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* | |
# block_node_or_indentless_sequence ::= | |
# ALIAS | |
# | properties (block_content | indentless_block_sequence)? | |
# | block_content | |
# | indentless_block_sequence | |
# block_node ::= ALIAS | |
# | properties block_content? | |
# | block_content | |
# flow_node ::= ALIAS | |
# | properties flow_content? | |
# | flow_content | |
# properties ::= TAG ANCHOR? | ANCHOR TAG? | |
# block_content ::= block_collection | flow_collection | SCALAR | |
# flow_content ::= flow_collection | SCALAR | |
# block_collection ::= block_sequence | block_mapping | |
# flow_collection ::= flow_sequence | flow_mapping | |
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END | |
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ | |
# block_mapping ::= BLOCK-MAPPING_START | |
# ((KEY block_node_or_indentless_sequence?)? | |
# (VALUE block_node_or_indentless_sequence?)?)* | |
# BLOCK-END | |
# flow_sequence ::= FLOW-SEQUENCE-START | |
# (flow_sequence_entry FLOW-ENTRY)* | |
# flow_sequence_entry? | |
# FLOW-SEQUENCE-END | |
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? | |
# flow_mapping ::= FLOW-MAPPING-START | |
# (flow_mapping_entry FLOW-ENTRY)* | |
# flow_mapping_entry? | |
# FLOW-MAPPING-END | |
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? | |
# | |
# FIRST sets: | |
# | |
# stream: { STREAM-START } | |
# explicit_document: { DIRECTIVE DOCUMENT-START } | |
# implicit_document: FIRST(block_node) | |
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } | |
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } | |
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } | |
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } | |
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } | |
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } | |
# block_sequence: { BLOCK-SEQUENCE-START } | |
# block_mapping: { BLOCK-MAPPING-START } | |
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } | |
# indentless_sequence: { ENTRY } | |
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } | |
# flow_sequence: { FLOW-SEQUENCE-START } | |
# flow_mapping: { FLOW-MAPPING-START } | |
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } | |
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } | |
__all__ = ['Parser', 'ParserError'] | |
from .error import MarkedYAMLError | |
from .tokens import * | |
from .events import * | |
from .scanner import * | |
class ParserError(MarkedYAMLError): | |
pass | |
class Parser: | |
# Since writing a recursive-descendant parser is a straightforward task, we | |
# do not give many comments here. | |
DEFAULT_TAGS = { | |
'!': '!', | |
'!!': 'tag:yaml.org,2002:', | |
} | |
def __init__(self): | |
self.current_event = None | |
self.yaml_version = None | |
self.tag_handles = {} | |
self.states = [] | |
self.marks = [] | |
self.state = self.parse_stream_start | |
def dispose(self): | |
# Reset the state attributes (to clear self-references) | |
self.states = [] | |
self.state = None | |
def check_event(self, *choices): | |
# Check the type of the next event. | |
if self.current_event is None: | |
if self.state: | |
self.current_event = self.state() | |
if self.current_event is not None: | |
if not choices: | |
return True | |
for choice in choices: | |
if isinstance(self.current_event, choice): | |
return True | |
return False | |
def peek_event(self): | |
# Get the next event. | |
if self.current_event is None: | |
if self.state: | |
self.current_event = self.state() | |
return self.current_event | |
def get_event(self): | |
# Get the next event and proceed further. | |
if self.current_event is None: | |
if self.state: | |
self.current_event = self.state() | |
value = self.current_event | |
self.current_event = None | |
return value | |
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END | |
# implicit_document ::= block_node DOCUMENT-END* | |
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* | |
def parse_stream_start(self): | |
# Parse the stream start. | |
token = self.get_token() | |
event = StreamStartEvent(token.start_mark, token.end_mark, | |
encoding=token.encoding) | |
# Prepare the next state. | |
self.state = self.parse_implicit_document_start | |
return event | |
def parse_implicit_document_start(self): | |
# Parse an implicit document. | |
if not self.check_token(DirectiveToken, DocumentStartToken, | |
StreamEndToken): | |
self.tag_handles = self.DEFAULT_TAGS | |
token = self.peek_token() | |
start_mark = end_mark = token.start_mark | |
event = DocumentStartEvent(start_mark, end_mark, | |
explicit=False) | |
# Prepare the next state. | |
self.states.append(self.parse_document_end) | |
self.state = self.parse_block_node | |
return event | |
else: | |
return self.parse_document_start() | |
def parse_document_start(self): | |
# Parse any extra document end indicators. | |
while self.check_token(DocumentEndToken): | |
self.get_token() | |
# Parse an explicit document. | |
if not self.check_token(StreamEndToken): | |
token = self.peek_token() | |
start_mark = token.start_mark | |
version, tags = self.process_directives() | |
if not self.check_token(DocumentStartToken): | |
raise ParserError(None, None, | |
"expected '<document start>', but found %r" | |
% self.peek_token().id, | |
self.peek_token().start_mark) | |
token = self.get_token() | |
end_mark = token.end_mark | |
event = DocumentStartEvent(start_mark, end_mark, | |
explicit=True, version=version, tags=tags) | |
self.states.append(self.parse_document_end) | |
self.state = self.parse_document_content | |
else: | |
# Parse the end of the stream. | |
token = self.get_token() | |
event = StreamEndEvent(token.start_mark, token.end_mark) | |
assert not self.states | |
assert not self.marks | |
self.state = None | |
return event | |
def parse_document_end(self): | |
# Parse the document end. | |
token = self.peek_token() | |
start_mark = end_mark = token.start_mark | |
explicit = False | |
if self.check_token(DocumentEndToken): | |
token = self.get_token() | |
end_mark = token.end_mark | |
explicit = True | |
event = DocumentEndEvent(start_mark, end_mark, | |
explicit=explicit) | |
# Prepare the next state. | |
self.state = self.parse_document_start | |
return event | |
def parse_document_content(self): | |
if self.check_token(DirectiveToken, | |
DocumentStartToken, DocumentEndToken, StreamEndToken): | |
event = self.process_empty_scalar(self.peek_token().start_mark) | |
self.state = self.states.pop() | |
return event | |
else: | |
return self.parse_block_node() | |
def process_directives(self): | |
self.yaml_version = None | |
self.tag_handles = {} | |
while self.check_token(DirectiveToken): | |
token = self.get_token() | |
if token.name == 'YAML': | |
if self.yaml_version is not None: | |
raise ParserError(None, None, | |
"found duplicate YAML directive", token.start_mark) | |
major, minor = token.value | |
if major != 1: | |
raise ParserError(None, None, | |
"found incompatible YAML document (version 1.* is required)", | |
token.start_mark) | |
self.yaml_version = token.value | |
elif token.name == 'TAG': | |
handle, prefix = token.value | |
if handle in self.tag_handles: | |
raise ParserError(None, None, | |
"duplicate tag handle %r" % handle, | |
token.start_mark) | |
self.tag_handles[handle] = prefix | |
if self.tag_handles: | |
value = self.yaml_version, self.tag_handles.copy() | |
else: | |
value = self.yaml_version, None | |
for key in self.DEFAULT_TAGS: | |
if key not in self.tag_handles: | |
self.tag_handles[key] = self.DEFAULT_TAGS[key] | |
return value | |
# block_node_or_indentless_sequence ::= ALIAS | |
# | properties (block_content | indentless_block_sequence)? | |
# | block_content | |
# | indentless_block_sequence | |
# block_node ::= ALIAS | |
# | properties block_content? | |
# | block_content | |
# flow_node ::= ALIAS | |
# | properties flow_content? | |
# | flow_content | |
# properties ::= TAG ANCHOR? | ANCHOR TAG? | |
# block_content ::= block_collection | flow_collection | SCALAR | |
# flow_content ::= flow_collection | SCALAR | |
# block_collection ::= block_sequence | block_mapping | |
# flow_collection ::= flow_sequence | flow_mapping | |
def parse_block_node(self): | |
return self.parse_node(block=True) | |
def parse_flow_node(self): | |
return self.parse_node() | |
def parse_block_node_or_indentless_sequence(self): | |
return self.parse_node(block=True, indentless_sequence=True) | |
def parse_node(self, block=False, indentless_sequence=False): | |
if self.check_token(AliasToken): | |
token = self.get_token() | |
event = AliasEvent(token.value, token.start_mark, token.end_mark) | |
self.state = self.states.pop() | |
else: | |
anchor = None | |
tag = None | |
start_mark = end_mark = tag_mark = None | |
if self.check_token(AnchorToken): | |
token = self.get_token() | |
start_mark = token.start_mark | |
end_mark = token.end_mark | |
anchor = token.value | |
if self.check_token(TagToken): | |
token = self.get_token() | |
tag_mark = token.start_mark | |
end_mark = token.end_mark | |
tag = token.value | |
elif self.check_token(TagToken): | |
token = self.get_token() | |
start_mark = tag_mark = token.start_mark | |
end_mark = token.end_mark | |
tag = token.value | |
if self.check_token(AnchorToken): | |
token = self.get_token() | |
end_mark = token.end_mark | |
anchor = token.value | |
if tag is not None: | |
handle, suffix = tag | |
if handle is not None: | |
if handle not in self.tag_handles: | |
raise ParserError("while parsing a node", start_mark, | |
"found undefined tag handle %r" % handle, | |
tag_mark) | |
tag = self.tag_handles[handle]+suffix | |
else: | |
tag = suffix | |
#if tag == '!': | |
# raise ParserError("while parsing a node", start_mark, | |
# "found non-specific tag '!'", tag_mark, | |
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") | |
if start_mark is None: | |
start_mark = end_mark = self.peek_token().start_mark | |
event = None | |
implicit = (tag is None or tag == '!') | |
if indentless_sequence and self.check_token(BlockEntryToken): | |
end_mark = self.peek_token().end_mark | |
event = SequenceStartEvent(anchor, tag, implicit, | |
start_mark, end_mark) | |
self.state = self.parse_indentless_sequence_entry | |
else: | |
if self.check_token(ScalarToken): | |
token = self.get_token() | |
end_mark = token.end_mark | |
if (token.plain and tag is None) or tag == '!': | |
implicit = (True, False) | |
elif tag is None: | |
implicit = (False, True) | |
else: | |
implicit = (False, False) | |
event = ScalarEvent(anchor, tag, implicit, token.value, | |
start_mark, end_mark, style=token.style) | |
self.state = self.states.pop() | |
elif self.check_token(FlowSequenceStartToken): | |
end_mark = self.peek_token().end_mark | |
event = SequenceStartEvent(anchor, tag, implicit, | |
start_mark, end_mark, flow_style=True) | |
self.state = self.parse_flow_sequence_first_entry | |
elif self.check_token(FlowMappingStartToken): | |
end_mark = self.peek_token().end_mark | |
event = MappingStartEvent(anchor, tag, implicit, | |
start_mark, end_mark, flow_style=True) | |
self.state = self.parse_flow_mapping_first_key | |
elif block and self.check_token(BlockSequenceStartToken): | |
end_mark = self.peek_token().start_mark | |
event = SequenceStartEvent(anchor, tag, implicit, | |
start_mark, end_mark, flow_style=False) | |
self.state = self.parse_block_sequence_first_entry | |
elif block and self.check_token(BlockMappingStartToken): | |
end_mark = self.peek_token().start_mark | |
event = MappingStartEvent(anchor, tag, implicit, | |
start_mark, end_mark, flow_style=False) | |
self.state = self.parse_block_mapping_first_key | |
elif anchor is not None or tag is not None: | |
# Empty scalars are allowed even if a tag or an anchor is | |
# specified. | |
event = ScalarEvent(anchor, tag, (implicit, False), '', | |
start_mark, end_mark) | |
self.state = self.states.pop() | |
else: | |
if block: | |
node = 'block' | |
else: | |
node = 'flow' | |
token = self.peek_token() | |
raise ParserError("while parsing a %s node" % node, start_mark, | |
"expected the node content, but found %r" % token.id, | |
token.start_mark) | |
return event | |
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END | |
def parse_block_sequence_first_entry(self): | |
token = self.get_token() | |
self.marks.append(token.start_mark) | |
return self.parse_block_sequence_entry() | |
def parse_block_sequence_entry(self): | |
if self.check_token(BlockEntryToken): | |
token = self.get_token() | |
if not self.check_token(BlockEntryToken, BlockEndToken): | |
self.states.append(self.parse_block_sequence_entry) | |
return self.parse_block_node() | |
else: | |
self.state = self.parse_block_sequence_entry | |
return self.process_empty_scalar(token.end_mark) | |
if not self.check_token(BlockEndToken): | |
token = self.peek_token() | |
raise ParserError("while parsing a block collection", self.marks[-1], | |
"expected <block end>, but found %r" % token.id, token.start_mark) | |
token = self.get_token() | |
event = SequenceEndEvent(token.start_mark, token.end_mark) | |
self.state = self.states.pop() | |
self.marks.pop() | |
return event | |
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ | |
def parse_indentless_sequence_entry(self): | |
if self.check_token(BlockEntryToken): | |
token = self.get_token() | |
if not self.check_token(BlockEntryToken, | |
KeyToken, ValueToken, BlockEndToken): | |
self.states.append(self.parse_indentless_sequence_entry) | |
return self.parse_block_node() | |
else: | |
self.state = self.parse_indentless_sequence_entry | |
return self.process_empty_scalar(token.end_mark) | |
token = self.peek_token() | |
event = SequenceEndEvent(token.start_mark, token.start_mark) | |
self.state = self.states.pop() | |
return event | |
# block_mapping ::= BLOCK-MAPPING_START | |
# ((KEY block_node_or_indentless_sequence?)? | |
# (VALUE block_node_or_indentless_sequence?)?)* | |
# BLOCK-END | |
def parse_block_mapping_first_key(self): | |
token = self.get_token() | |
self.marks.append(token.start_mark) | |
return self.parse_block_mapping_key() | |
def parse_block_mapping_key(self): | |
if self.check_token(KeyToken): | |
token = self.get_token() | |
if not self.check_token(KeyToken, ValueToken, BlockEndToken): | |
self.states.append(self.parse_block_mapping_value) | |
return self.parse_block_node_or_indentless_sequence() | |
else: | |
self.state = self.parse_block_mapping_value | |
return self.process_empty_scalar(token.end_mark) | |
if not self.check_token(BlockEndToken): | |
token = self.peek_token() | |
raise ParserError("while parsing a block mapping", self.marks[-1], | |
"expected <block end>, but found %r" % token.id, token.start_mark) | |
token = self.get_token() | |
event = MappingEndEvent(token.start_mark, token.end_mark) | |
self.state = self.states.pop() | |
self.marks.pop() | |
return event | |
def parse_block_mapping_value(self): | |
if self.check_token(ValueToken): | |
token = self.get_token() | |
if not self.check_token(KeyToken, ValueToken, BlockEndToken): | |
self.states.append(self.parse_block_mapping_key) | |
return self.parse_block_node_or_indentless_sequence() | |
else: | |
self.state = self.parse_block_mapping_key | |
return self.process_empty_scalar(token.end_mark) | |
else: | |
self.state = self.parse_block_mapping_key | |
token = self.peek_token() | |
return self.process_empty_scalar(token.start_mark) | |
# flow_sequence ::= FLOW-SEQUENCE-START | |
# (flow_sequence_entry FLOW-ENTRY)* | |
# flow_sequence_entry? | |
# FLOW-SEQUENCE-END | |
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? | |
# | |
# Note that while production rules for both flow_sequence_entry and | |
# flow_mapping_entry are equal, their interpretations are different. | |
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` | |
# generate an inline mapping (set syntax). | |
def parse_flow_sequence_first_entry(self): | |
token = self.get_token() | |
self.marks.append(token.start_mark) | |
return self.parse_flow_sequence_entry(first=True) | |
def parse_flow_sequence_entry(self, first=False): | |
if not self.check_token(FlowSequenceEndToken): | |
if not first: | |
if self.check_token(FlowEntryToken): | |
self.get_token() | |
else: | |
token = self.peek_token() | |
raise ParserError("while parsing a flow sequence", self.marks[-1], | |
"expected ',' or ']', but got %r" % token.id, token.start_mark) | |
if self.check_token(KeyToken): | |
token = self.peek_token() | |
event = MappingStartEvent(None, None, True, | |
token.start_mark, token.end_mark, | |
flow_style=True) | |
self.state = self.parse_flow_sequence_entry_mapping_key | |
return event | |
elif not self.check_token(FlowSequenceEndToken): | |
self.states.append(self.parse_flow_sequence_entry) | |
return self.parse_flow_node() | |
token = self.get_token() | |
event = SequenceEndEvent(token.start_mark, token.end_mark) | |
self.state = self.states.pop() | |
self.marks.pop() | |
return event | |
def parse_flow_sequence_entry_mapping_key(self): | |
token = self.get_token() | |
if not self.check_token(ValueToken, | |
FlowEntryToken, FlowSequenceEndToken): | |
self.states.append(self.parse_flow_sequence_entry_mapping_value) | |
return self.parse_flow_node() | |
else: | |
self.state = self.parse_flow_sequence_entry_mapping_value | |
return self.process_empty_scalar(token.end_mark) | |
def parse_flow_sequence_entry_mapping_value(self): | |
if self.check_token(ValueToken): | |
token = self.get_token() | |
if not self.check_token(FlowEntryToken, FlowSequenceEndToken): | |
self.states.append(self.parse_flow_sequence_entry_mapping_end) | |
return self.parse_flow_node() | |
else: | |
self.state = self.parse_flow_sequence_entry_mapping_end | |
return self.process_empty_scalar(token.end_mark) | |
else: | |
self.state = self.parse_flow_sequence_entry_mapping_end | |
token = self.peek_token() | |
return self.process_empty_scalar(token.start_mark) | |
def parse_flow_sequence_entry_mapping_end(self): | |
self.state = self.parse_flow_sequence_entry | |
token = self.peek_token() | |
return MappingEndEvent(token.start_mark, token.start_mark) | |
# flow_mapping ::= FLOW-MAPPING-START | |
# (flow_mapping_entry FLOW-ENTRY)* | |
# flow_mapping_entry? | |
# FLOW-MAPPING-END | |
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? | |
def parse_flow_mapping_first_key(self): | |
token = self.get_token() | |
self.marks.append(token.start_mark) | |
return self.parse_flow_mapping_key(first=True) | |
def parse_flow_mapping_key(self, first=False): | |
if not self.check_token(FlowMappingEndToken): | |
if not first: | |
if self.check_token(FlowEntryToken): | |
self.get_token() | |
else: | |
token = self.peek_token() | |
raise ParserError("while parsing a flow mapping", self.marks[-1], | |
"expected ',' or '}', but got %r" % token.id, token.start_mark) | |
if self.check_token(KeyToken): | |
token = self.get_token() | |
if not self.check_token(ValueToken, | |
FlowEntryToken, FlowMappingEndToken): | |
self.states.append(self.parse_flow_mapping_value) | |
return self.parse_flow_node() | |
else: | |
self.state = self.parse_flow_mapping_value | |
return self.process_empty_scalar(token.end_mark) | |
elif not self.check_token(FlowMappingEndToken): | |
self.states.append(self.parse_flow_mapping_empty_value) | |
return self.parse_flow_node() | |
token = self.get_token() | |
event = MappingEndEvent(token.start_mark, token.end_mark) | |
self.state = self.states.pop() | |
self.marks.pop() | |
return event | |
def parse_flow_mapping_value(self): | |
if self.check_token(ValueToken): | |
token = self.get_token() | |
if not self.check_token(FlowEntryToken, FlowMappingEndToken): | |
self.states.append(self.parse_flow_mapping_key) | |
return self.parse_flow_node() | |
else: | |
self.state = self.parse_flow_mapping_key | |
return self.process_empty_scalar(token.end_mark) | |
else: | |
self.state = self.parse_flow_mapping_key | |
token = self.peek_token() | |
return self.process_empty_scalar(token.start_mark) | |
def parse_flow_mapping_empty_value(self): | |
self.state = self.parse_flow_mapping_key | |
return self.process_empty_scalar(self.peek_token().start_mark) | |
def process_empty_scalar(self, mark): | |
return ScalarEvent(None, None, (True, False), '', mark, mark) | |