Decoding side builds.
diff --git a/api_c.go b/api_c.go new file mode 100644 index 0000000..5d7a0f0 --- /dev/null +++ b/api_c.go
@@ -0,0 +1,1164 @@ +package goyaml + +import ( + "io" + "os" +) + +///* +// * Extend a string. +// */ +// +//YAML_DECLARE(int) +//yaml_string_extend(yaml_char_t **start, +// yaml_char_t **pointer, yaml_char_t **end) +//{ +// yaml_char_t *new_start = yaml_realloc(*start, (*end - *start)*2); +// +// if (!new_start) return 0; +// +// memset(new_start + (*end - *start), 0, *end - *start); +// +// *pointer = new_start + (*pointer - *start); +// *end = new_start + (*end - *start)*2; +// *start = new_start; +// +// return 1; +//} +// +///* +// * Append a string B to a string A. +// */ +// +//YAML_DECLARE(int) +//yaml_string_join( +// yaml_char_t **a_start, yaml_char_t **a_pointer, yaml_char_t **a_end, +// yaml_char_t **b_start, yaml_char_t **b_pointer, yaml_char_t **b_end) +//{ +// if (*b_start == *b_pointer) +// return 1; +// +// while (*a_end - *a_pointer <= *b_pointer - *b_start) { +// if (!yaml_string_extend(a_start, a_pointer, a_end)) +// return 0; +// } +// +// memcpy(*a_pointer, *b_start, *b_pointer - *b_start); +// *a_pointer += *b_pointer - *b_start; +// +// return 1; +//} +// +///* +// * Extend a stack. +// */ +// +//YAML_DECLARE(int) +//yaml_stack_extend(void **start, void **top, void **end) +//{ +// void *new_start = yaml_realloc(*start, ((char *)*end - (char *)*start)*2); +// +// if (!new_start) return 0; +// +// *top = (char *)new_start + ((char *)*top - (char *)*start); +// *end = (char *)new_start + ((char *)*end - (char *)*start)*2; +// *start = new_start; +// +// return 1; +//} + +func yaml_insert_token(parser *yaml_parser_t, pos int, token *yaml_token_t) { + // Check if we can move the queue at the beginning of the buffer. + if parser.tokens_head > 0 && len(parser.tokens) == cap(parser.tokens) { + if parser.tokens_head != len(parser.tokens) { + copy(parser.tokens, parser.tokens[parser.tokens_head:]) + } + parser.tokens = parser.tokens[:len(parser.tokens)-parser.tokens_head] + parser.tokens_head = 0 + } + parser.tokens = append(parser.tokens, yaml_token_t{}) + if pos < 0 { + return + } + copy(parser.tokens[parser.tokens_head+pos+1:], parser.tokens[parser.tokens_head+pos:]) + parser.tokens[pos] = *token +} + +// Create a new parser object. +func yaml_parser_initialize(parser *yaml_parser_t) bool { + // [Go] These should be initialized lazily, and probably start with smaller sizes. + parser.raw_buffer = make([]byte, 0, input_raw_buffer_size) + parser.buffer = make([]byte, 0, input_buffer_size) + parser.tokens = make([]yaml_token_t, 0, initial_queue_size) + parser.indents = make([]int, 0, initial_stack_size) + parser.simple_keys = make([]yaml_simple_key_t, 0, initial_stack_size) + parser.states = make([]yaml_parser_state_t, 0, initial_stack_size) + parser.marks = make([]yaml_mark_t, 0, initial_stack_size) + parser.tag_directives = make([]yaml_tag_directive_t, 0, initial_stack_size) + return true +} + +// String read handler. +func yaml_string_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + if parser.input_pos == len(parser.input) { + return 0, io.EOF + } + n = copy(buffer, parser.input[parser.input_pos:]) + parser.input_pos += n + return n, nil +} + +// File read handler. +func yaml_file_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + return parser.input_file.Read(buffer) +} + +// Set a string input. +func yaml_parser_set_input_string(parser *yaml_parser_t, input []byte) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_string_read_handler + parser.input = input + parser.input_pos = 0 +} + +// Set a file input. +func yaml_parser_set_input_file(parser *yaml_parser_t, file *os.File) { + if parser.read_handler != nil { + panic("must set the input source only once") + } + parser.read_handler = yaml_file_read_handler + parser.input_file = file +} + +///* +// * Set the source encoding. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) +//{ +// assert(parser); // Non-NULL parser object expected. +// assert(!parser.encoding); // Encoding is already set or detected. +// +// parser.encoding = encoding; +//} +// +///* +// * Create a new emitter object. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_initialize(yaml_emitter_t *emitter) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// memset(emitter, 0, sizeof(yaml_emitter_t)); +// if (!BUFFER_INIT(emitter, emitter.buffer, OUTPUT_BUFFER_SIZE)) +// goto error; +// if (!BUFFER_INIT(emitter, emitter.raw_buffer, OUTPUT_RAW_BUFFER_SIZE)) +// goto error; +// if (!STACK_INIT(emitter, emitter.states, INITIAL_STACK_SIZE)) +// goto error; +// if (!QUEUE_INIT(emitter, emitter.events, INITIAL_QUEUE_SIZE)) +// goto error; +// if (!STACK_INIT(emitter, emitter.indents, INITIAL_STACK_SIZE)) +// goto error; +// if (!STACK_INIT(emitter, emitter.tag_directives, INITIAL_STACK_SIZE)) +// goto error; +// +// return 1; +// +//error: +// +// BUFFER_DEL(emitter, emitter.buffer); +// BUFFER_DEL(emitter, emitter.raw_buffer); +// STACK_DEL(emitter, emitter.states); +// QUEUE_DEL(emitter, emitter.events); +// STACK_DEL(emitter, emitter.indents); +// STACK_DEL(emitter, emitter.tag_directives); +// +// return 0; +//} +// +///* +// * Destroy an emitter object. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_delete(yaml_emitter_t *emitter) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// BUFFER_DEL(emitter, emitter.buffer); +// BUFFER_DEL(emitter, emitter.raw_buffer); +// STACK_DEL(emitter, emitter.states); +// while (!QUEUE_EMPTY(emitter, emitter.events)) { +// yaml_event_delete(&DEQUEUE(emitter, emitter.events)); +// } +// QUEUE_DEL(emitter, emitter.events); +// STACK_DEL(emitter, emitter.indents); +// while (!STACK_EMPTY(empty, emitter.tag_directives)) { +// yaml_tag_directive_t tag_directive = POP(emitter, emitter.tag_directives); +// yaml_free(tag_directive.handle); +// yaml_free(tag_directive.prefix); +// } +// STACK_DEL(emitter, emitter.tag_directives); +// yaml_free(emitter.anchors); +// +// memset(emitter, 0, sizeof(yaml_emitter_t)); +//} +// +///* +// * String write handler. +// */ +// +//static int +//yaml_string_write_handler(void *data, unsigned char *buffer, size_t size) +//{ +// yaml_emitter_t *emitter = data; +// +// if (emitter.output.string.size + *emitter.output.string.size_written +// < size) { +// memcpy(emitter.output.string.buffer +// + *emitter.output.string.size_written, +// buffer, +// emitter.output.string.size +// - *emitter.output.string.size_written); +// *emitter.output.string.size_written = emitter.output.string.size; +// return 0; +// } +// +// memcpy(emitter.output.string.buffer +// + *emitter.output.string.size_written, buffer, size); +// *emitter.output.string.size_written += size; +// return 1; +//} +// +///* +// * File write handler. +// */ +// +//static int +//yaml_file_write_handler(void *data, unsigned char *buffer, size_t size) +//{ +// yaml_emitter_t *emitter = data; +// +// return (fwrite(buffer, 1, size, emitter.output.file) == size); +//} +///* +// * Set a string output. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output_string(yaml_emitter_t *emitter, +// unsigned char *output, size_t size, size_t *size_written) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// assert(!emitter.write_handler); // You can set the output only once. +// assert(output); // Non-NULL output string expected. +// +// emitter.write_handler = yaml_string_write_handler; +// emitter.write_handler_data = emitter; +// +// emitter.output.string.buffer = output; +// emitter.output.string.size = size; +// emitter.output.string.size_written = size_written; +// *size_written = 0; +//} +// +///* +// * Set a file output. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output_file(yaml_emitter_t *emitter, FILE *file) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// assert(!emitter.write_handler); // You can set the output only once. +// assert(file); // Non-NULL file object expected. +// +// emitter.write_handler = yaml_file_write_handler; +// emitter.write_handler_data = emitter; +// +// emitter.output.file = file; +//} +// +///* +// * Set a generic output handler. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output(yaml_emitter_t *emitter, +// yaml_write_handler_t *handler, void *data) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// assert(!emitter.write_handler); // You can set the output only once. +// assert(handler); // Non-NULL handler object expected. +// +// emitter.write_handler = handler; +// emitter.write_handler_data = data; +//} +// +///* +// * Set the output encoding. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_encoding(yaml_emitter_t *emitter, yaml_encoding_t encoding) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// assert(!emitter.encoding); // You can set encoding only once. +// +// emitter.encoding = encoding; +//} +// +///* +// * Set the canonical output style. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// emitter.canonical = (canonical != 0); +//} +// +///* +// * Set the indentation increment. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// emitter.best_indent = (1 < indent && indent < 10) ? indent : 2; +//} +// +///* +// * Set the preferred line width. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_width(yaml_emitter_t *emitter, int width) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// emitter.best_width = (width >= 0) ? width : -1; +//} +// +///* +// * Set if unescaped non-ASCII characters are allowed. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_unicode(yaml_emitter_t *emitter, int unicode) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// emitter.unicode = (unicode != 0); +//} +// +///* +// * Set the preferred line break character. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break) +//{ +// assert(emitter); // Non-NULL emitter object expected. +// +// emitter.line_break = line_break; +//} +// +///* +// * Destroy a token object. +// */ +// +//YAML_DECLARE(void) +//yaml_token_delete(yaml_token_t *token) +//{ +// assert(token); // Non-NULL token object expected. +// +// switch (token.type) +// { +// case YAML_TAG_DIRECTIVE_TOKEN: +// yaml_free(token.data.tag_directive.handle); +// yaml_free(token.data.tag_directive.prefix); +// break; +// +// case YAML_ALIAS_TOKEN: +// yaml_free(token.data.alias.value); +// break; +// +// case YAML_ANCHOR_TOKEN: +// yaml_free(token.data.anchor.value); +// break; +// +// case YAML_TAG_TOKEN: +// yaml_free(token.data.tag.handle); +// yaml_free(token.data.tag.suffix); +// break; +// +// case YAML_SCALAR_TOKEN: +// yaml_free(token.data.scalar.value); +// break; +// +// default: +// break; +// } +// +// memset(token, 0, sizeof(yaml_token_t)); +//} +// +///* +// * Check if a string is a valid UTF-8 sequence. +// * +// * Check 'reader.c' for more details on UTF-8 encoding. +// */ +// +//static int +//yaml_check_utf8(yaml_char_t *start, size_t length) +//{ +// yaml_char_t *end = start+length; +// yaml_char_t *pointer = start; +// +// while (pointer < end) { +// unsigned char octet; +// unsigned int width; +// unsigned int value; +// size_t k; +// +// octet = pointer[0]; +// width = (octet & 0x80) == 0x00 ? 1 : +// (octet & 0xE0) == 0xC0 ? 2 : +// (octet & 0xF0) == 0xE0 ? 3 : +// (octet & 0xF8) == 0xF0 ? 4 : 0; +// value = (octet & 0x80) == 0x00 ? octet & 0x7F : +// (octet & 0xE0) == 0xC0 ? octet & 0x1F : +// (octet & 0xF0) == 0xE0 ? octet & 0x0F : +// (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; +// if (!width) return 0; +// if (pointer+width > end) return 0; +// for (k = 1; k < width; k ++) { +// octet = pointer[k]; +// if ((octet & 0xC0) != 0x80) return 0; +// value = (value << 6) + (octet & 0x3F); +// } +// if (!((width == 1) || +// (width == 2 && value >= 0x80) || +// (width == 3 && value >= 0x800) || +// (width == 4 && value >= 0x10000))) return 0; +// +// pointer += width; +// } +// +// return 1; +//} +// +///* +// * Create STREAM-START. +// */ +// +//YAML_DECLARE(int) +//yaml_stream_start_event_initialize(yaml_event_t *event, +// yaml_encoding_t encoding) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(event); // Non-NULL event object is expected. +// +// STREAM_START_EVENT_INIT(*event, encoding, mark, mark); + //*event = yaml_event_t{ + // typ: yaml_STREAM_START_EVENT, + // start_mark: mark, + // end_mark: mark, + //} + //event.stream_start.encoding = encoding +// +// return 1; +//} +// +///* +// * Create STREAM-END. +// */ +// +//YAML_DECLARE(int) +//yaml_stream_end_event_initialize(yaml_event_t *event) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(event); // Non-NULL event object is expected. +// +// STREAM_END_EVENT_INIT(*event, mark, mark); + //*event = yaml_event_t{ + // typ: yaml_STREAM_END_EVENT, + // start_mark: mark, + // end_mark: mark, + //} +// +// return 1; +//} +// +///* +// * Create DOCUMENT-START. +// */ +// +//YAML_DECLARE(int) +//yaml_document_start_event_initialize(yaml_event_t *event, +// yaml_version_directive_t *version_directive, +// yaml_tag_directive_t *tag_directives_start, +// yaml_tag_directive_t *tag_directives_end, +// int implicit) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_version_directive_t *version_directive_copy = NULL; +// struct { +// yaml_tag_directive_t *start; +// yaml_tag_directive_t *end; +// yaml_tag_directive_t *top; +// } tag_directives_copy = { NULL, NULL, NULL }; +// yaml_tag_directive_t value = { NULL, NULL }; +// +// assert(event); // Non-NULL event object is expected. +// assert((tag_directives_start && tag_directives_end) || +// (tag_directives_start == tag_directives_end)); +// // Valid tag directives are expected. +// +// if (version_directive) { +// version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); +// if (!version_directive_copy) goto error; +// version_directive_copy.major = version_directive.major; +// version_directive_copy.minor = version_directive.minor; +// } +// +// if (tag_directives_start != tag_directives_end) { +// yaml_tag_directive_t *tag_directive; +// if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) +// goto error; +// for (tag_directive = tag_directives_start; +// tag_directive != tag_directives_end; tag_directive ++) { +// assert(tag_directive.handle); +// assert(tag_directive.prefix); +// if (!yaml_check_utf8(tag_directive.handle, +// strlen((char *)tag_directive.handle))) +// goto error; +// if (!yaml_check_utf8(tag_directive.prefix, +// strlen((char *)tag_directive.prefix))) +// goto error; +// value.handle = yaml_strdup(tag_directive.handle); +// value.prefix = yaml_strdup(tag_directive.prefix); +// if (!value.handle || !value.prefix) goto error; +// if (!PUSH(&context, tag_directives_copy, value)) +// goto error; +// value.handle = NULL; +// value.prefix = NULL; +// } +// } +// +// DOCUMENT_START_EVENT_INIT(*event, version_directive_copy, +// tag_directives_copy.start, tag_directives_copy.top, +// implicit, mark, mark); + //*event = yaml_event_t{ + // typ: yaml_DOCUMENT_START_EVENT, + // start_mark: mark, + // end_mark: mark, + //} + //event.document_start.version_directive = version_directive_copy + //event.document_start.tag_directives = tag_directives + //event.document_start.implicit = implicit +// +// return 1; +// +//error: +// yaml_free(version_directive_copy); +// while (!STACK_EMPTY(context, tag_directives_copy)) { +// yaml_tag_directive_t value = POP(context, tag_directives_copy); +// yaml_free(value.handle); +// yaml_free(value.prefix); +// } +// STACK_DEL(context, tag_directives_copy); +// yaml_free(value.handle); +// yaml_free(value.prefix); +// +// return 0; +//} +// +///* +// * Create DOCUMENT-END. +// */ +// +//YAML_DECLARE(int) +//yaml_document_end_event_initialize(yaml_event_t *event, int implicit) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(event); // Non-NULL emitter object is expected. +// +// DOCUMENT_END_EVENT_INIT(*event, implicit, mark, mark); +// +// return 1; +//} +// +///* +// * Create ALIAS. +// */ +// +//YAML_DECLARE(int) +//yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *anchor_copy = NULL; +// +// assert(event); // Non-NULL event object is expected. +// assert(anchor); // Non-NULL anchor is expected. +// +// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) return 0; +// +// anchor_copy = yaml_strdup(anchor); +// if (!anchor_copy) +// return 0; +// +// ALIAS_EVENT_INIT(*event, anchor_copy, mark, mark); +// +// return 1; +//} +// +///* +// * Create SCALAR. +// */ +// +//YAML_DECLARE(int) +//yaml_scalar_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, +// yaml_char_t *value, int length, +// int plain_implicit, int quoted_implicit, +// yaml_scalar_style_t style) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *anchor_copy = NULL; +// yaml_char_t *tag_copy = NULL; +// yaml_char_t *value_copy = NULL; +// +// assert(event); // Non-NULL event object is expected. +// assert(value); // Non-NULL anchor is expected. +// +// if (anchor) { +// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; +// anchor_copy = yaml_strdup(anchor); +// if (!anchor_copy) goto error; +// } +// +// if (tag) { +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// } +// +// if (length < 0) { +// length = strlen((char *)value); +// } +// +// if (!yaml_check_utf8(value, length)) goto error; +// value_copy = yaml_malloc(length+1); +// if (!value_copy) goto error; +// memcpy(value_copy, value, length); +// value_copy[length] = '\0'; +// +// SCALAR_EVENT_INIT(*event, anchor_copy, tag_copy, value_copy, length, +// plain_implicit, quoted_implicit, style, mark, mark); +// +// return 1; +// +//error: +// yaml_free(anchor_copy); +// yaml_free(tag_copy); +// yaml_free(value_copy); +// +// return 0; +//} +// +///* +// * Create SEQUENCE-START. +// */ +// +//YAML_DECLARE(int) +//yaml_sequence_start_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, int implicit, +// yaml_sequence_style_t style) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *anchor_copy = NULL; +// yaml_char_t *tag_copy = NULL; +// +// assert(event); // Non-NULL event object is expected. +// +// if (anchor) { +// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; +// anchor_copy = yaml_strdup(anchor); +// if (!anchor_copy) goto error; +// } +// +// if (tag) { +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// } +// +// SEQUENCE_START_EVENT_INIT(*event, anchor_copy, tag_copy, +// implicit, style, mark, mark); +// +// return 1; +// +//error: +// yaml_free(anchor_copy); +// yaml_free(tag_copy); +// +// return 0; +//} +// +///* +// * Create SEQUENCE-END. +// */ +// +//YAML_DECLARE(int) +//yaml_sequence_end_event_initialize(yaml_event_t *event) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(event); // Non-NULL event object is expected. +// +// SEQUENCE_END_EVENT_INIT(*event, mark, mark); +// +// return 1; +//} +// +///* +// * Create MAPPING-START. +// */ +// +//YAML_DECLARE(int) +//yaml_mapping_start_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, int implicit, +// yaml_mapping_style_t style) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *anchor_copy = NULL; +// yaml_char_t *tag_copy = NULL; +// +// assert(event); // Non-NULL event object is expected. +// +// if (anchor) { +// if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; +// anchor_copy = yaml_strdup(anchor); +// if (!anchor_copy) goto error; +// } +// +// if (tag) { +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// } +// +// MAPPING_START_EVENT_INIT(*event, anchor_copy, tag_copy, +// implicit, style, mark, mark); +// +// return 1; +// +//error: +// yaml_free(anchor_copy); +// yaml_free(tag_copy); +// +// return 0; +//} +// +///* +// * Create MAPPING-END. +// */ +// +//YAML_DECLARE(int) +//yaml_mapping_end_event_initialize(yaml_event_t *event) +//{ +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(event); // Non-NULL event object is expected. +// +// MAPPING_END_EVENT_INIT(*event, mark, mark); +// +// return 1; +//} + +///* +// * Create a document object. +// */ +// +//YAML_DECLARE(int) +//yaml_document_initialize(yaml_document_t *document, +// yaml_version_directive_t *version_directive, +// yaml_tag_directive_t *tag_directives_start, +// yaml_tag_directive_t *tag_directives_end, +// int start_implicit, int end_implicit) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// struct { +// yaml_node_t *start; +// yaml_node_t *end; +// yaml_node_t *top; +// } nodes = { NULL, NULL, NULL }; +// yaml_version_directive_t *version_directive_copy = NULL; +// struct { +// yaml_tag_directive_t *start; +// yaml_tag_directive_t *end; +// yaml_tag_directive_t *top; +// } tag_directives_copy = { NULL, NULL, NULL }; +// yaml_tag_directive_t value = { NULL, NULL }; +// yaml_mark_t mark = { 0, 0, 0 }; +// +// assert(document); // Non-NULL document object is expected. +// assert((tag_directives_start && tag_directives_end) || +// (tag_directives_start == tag_directives_end)); +// // Valid tag directives are expected. +// +// if (!STACK_INIT(&context, nodes, INITIAL_STACK_SIZE)) goto error; +// +// if (version_directive) { +// version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); +// if (!version_directive_copy) goto error; +// version_directive_copy.major = version_directive.major; +// version_directive_copy.minor = version_directive.minor; +// } +// +// if (tag_directives_start != tag_directives_end) { +// yaml_tag_directive_t *tag_directive; +// if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) +// goto error; +// for (tag_directive = tag_directives_start; +// tag_directive != tag_directives_end; tag_directive ++) { +// assert(tag_directive.handle); +// assert(tag_directive.prefix); +// if (!yaml_check_utf8(tag_directive.handle, +// strlen((char *)tag_directive.handle))) +// goto error; +// if (!yaml_check_utf8(tag_directive.prefix, +// strlen((char *)tag_directive.prefix))) +// goto error; +// value.handle = yaml_strdup(tag_directive.handle); +// value.prefix = yaml_strdup(tag_directive.prefix); +// if (!value.handle || !value.prefix) goto error; +// if (!PUSH(&context, tag_directives_copy, value)) +// goto error; +// value.handle = NULL; +// value.prefix = NULL; +// } +// } +// +// DOCUMENT_INIT(*document, nodes.start, nodes.end, version_directive_copy, +// tag_directives_copy.start, tag_directives_copy.top, +// start_implicit, end_implicit, mark, mark); +// +// return 1; +// +//error: +// STACK_DEL(&context, nodes); +// yaml_free(version_directive_copy); +// while (!STACK_EMPTY(&context, tag_directives_copy)) { +// yaml_tag_directive_t value = POP(&context, tag_directives_copy); +// yaml_free(value.handle); +// yaml_free(value.prefix); +// } +// STACK_DEL(&context, tag_directives_copy); +// yaml_free(value.handle); +// yaml_free(value.prefix); +// +// return 0; +//} +// +///* +// * Destroy a document object. +// */ +// +//YAML_DECLARE(void) +//yaml_document_delete(yaml_document_t *document) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// yaml_tag_directive_t *tag_directive; +// +// context.error = YAML_NO_ERROR; // Eliminate a compliler warning. +// +// assert(document); // Non-NULL document object is expected. +// +// while (!STACK_EMPTY(&context, document.nodes)) { +// yaml_node_t node = POP(&context, document.nodes); +// yaml_free(node.tag); +// switch (node.type) { +// case YAML_SCALAR_NODE: +// yaml_free(node.data.scalar.value); +// break; +// case YAML_SEQUENCE_NODE: +// STACK_DEL(&context, node.data.sequence.items); +// break; +// case YAML_MAPPING_NODE: +// STACK_DEL(&context, node.data.mapping.pairs); +// break; +// default: +// assert(0); // Should not happen. +// } +// } +// STACK_DEL(&context, document.nodes); +// +// yaml_free(document.version_directive); +// for (tag_directive = document.tag_directives.start; +// tag_directive != document.tag_directives.end; +// tag_directive++) { +// yaml_free(tag_directive.handle); +// yaml_free(tag_directive.prefix); +// } +// yaml_free(document.tag_directives.start); +// +// memset(document, 0, sizeof(yaml_document_t)); +//} +// +///** +// * Get a document node. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_node(yaml_document_t *document, int index) +//{ +// assert(document); // Non-NULL document object is expected. +// +// if (index > 0 && document.nodes.start + index <= document.nodes.top) { +// return document.nodes.start + index - 1; +// } +// return NULL; +//} +// +///** +// * Get the root object. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_root_node(yaml_document_t *document) +//{ +// assert(document); // Non-NULL document object is expected. +// +// if (document.nodes.top != document.nodes.start) { +// return document.nodes.start; +// } +// return NULL; +//} +// +///* +// * Add a scalar node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_scalar(yaml_document_t *document, +// yaml_char_t *tag, yaml_char_t *value, int length, +// yaml_scalar_style_t style) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *tag_copy = NULL; +// yaml_char_t *value_copy = NULL; +// yaml_node_t node; +// +// assert(document); // Non-NULL document object is expected. +// assert(value); // Non-NULL value is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SCALAR_TAG; +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// +// if (length < 0) { +// length = strlen((char *)value); +// } +// +// if (!yaml_check_utf8(value, length)) goto error; +// value_copy = yaml_malloc(length+1); +// if (!value_copy) goto error; +// memcpy(value_copy, value, length); +// value_copy[length] = '\0'; +// +// SCALAR_NODE_INIT(node, tag_copy, value_copy, length, style, mark, mark); +// if (!PUSH(&context, document.nodes, node)) goto error; +// +// return document.nodes.top - document.nodes.start; +// +//error: +// yaml_free(tag_copy); +// yaml_free(value_copy); +// +// return 0; +//} +// +///* +// * Add a sequence node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_sequence(yaml_document_t *document, +// yaml_char_t *tag, yaml_sequence_style_t style) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *tag_copy = NULL; +// struct { +// yaml_node_item_t *start; +// yaml_node_item_t *end; +// yaml_node_item_t *top; +// } items = { NULL, NULL, NULL }; +// yaml_node_t node; +// +// assert(document); // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_SEQUENCE_TAG; +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// +// if (!STACK_INIT(&context, items, INITIAL_STACK_SIZE)) goto error; +// +// SEQUENCE_NODE_INIT(node, tag_copy, items.start, items.end, +// style, mark, mark); +// if (!PUSH(&context, document.nodes, node)) goto error; +// +// return document.nodes.top - document.nodes.start; +// +//error: +// STACK_DEL(&context, items); +// yaml_free(tag_copy); +// +// return 0; +//} +// +///* +// * Add a mapping node to a document. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_mapping(yaml_document_t *document, +// yaml_char_t *tag, yaml_mapping_style_t style) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// yaml_mark_t mark = { 0, 0, 0 }; +// yaml_char_t *tag_copy = NULL; +// struct { +// yaml_node_pair_t *start; +// yaml_node_pair_t *end; +// yaml_node_pair_t *top; +// } pairs = { NULL, NULL, NULL }; +// yaml_node_t node; +// +// assert(document); // Non-NULL document object is expected. +// +// if (!tag) { +// tag = (yaml_char_t *)YAML_DEFAULT_MAPPING_TAG; +// } +// +// if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; +// tag_copy = yaml_strdup(tag); +// if (!tag_copy) goto error; +// +// if (!STACK_INIT(&context, pairs, INITIAL_STACK_SIZE)) goto error; +// +// MAPPING_NODE_INIT(node, tag_copy, pairs.start, pairs.end, +// style, mark, mark); +// if (!PUSH(&context, document.nodes, node)) goto error; +// +// return document.nodes.top - document.nodes.start; +// +//error: +// STACK_DEL(&context, pairs); +// yaml_free(tag_copy); +// +// return 0; +//} +// +///* +// * Append an item to a sequence node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_sequence_item(yaml_document_t *document, +// int sequence, int item) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// +// assert(document); // Non-NULL document is required. +// assert(sequence > 0 +// && document.nodes.start + sequence <= document.nodes.top); +// // Valid sequence id is required. +// assert(document.nodes.start[sequence-1].type == YAML_SEQUENCE_NODE); +// // A sequence node is required. +// assert(item > 0 && document.nodes.start + item <= document.nodes.top); +// // Valid item id is required. +// +// if (!PUSH(&context, +// document.nodes.start[sequence-1].data.sequence.items, item)) +// return 0; +// +// return 1; +//} +// +///* +// * Append a pair of a key and a value to a mapping node. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_mapping_pair(yaml_document_t *document, +// int mapping, int key, int value) +//{ +// struct { +// yaml_error_type_t error; +// } context; +// +// yaml_node_pair_t pair; +// +// assert(document); // Non-NULL document is required. +// assert(mapping > 0 +// && document.nodes.start + mapping <= document.nodes.top); +// // Valid mapping id is required. +// assert(document.nodes.start[mapping-1].type == YAML_MAPPING_NODE); +// // A mapping node is required. +// assert(key > 0 && document.nodes.start + key <= document.nodes.top); +// // Valid key id is required. +// assert(value > 0 && document.nodes.start + value <= document.nodes.top); +// // Valid value id is required. +// +// pair.key = key; +// pair.value = value; +// +// if (!PUSH(&context, +// document.nodes.start[mapping-1].data.mapping.pairs, pair)) +// return 0; +// +// return 1; +//} +// +//
diff --git a/parser_c.go b/parser_c.go new file mode 100644 index 0000000..c965594 --- /dev/null +++ b/parser_c.go
@@ -0,0 +1,1096 @@ +package goyaml + +import ( + "bytes" +) + +// The parser implements the following grammar: +// +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// implicit_document ::= block_node DOCUMENT-END* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// block_node_or_indentless_sequence ::= +// ALIAS +// | properties (block_content | indentless_block_sequence)? +// | block_content +// | indentless_block_sequence +// block_node ::= ALIAS +// | properties block_content? +// | block_content +// flow_node ::= ALIAS +// | properties flow_content? +// | flow_content +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// block_content ::= block_collection | flow_collection | SCALAR +// flow_content ::= flow_collection | SCALAR +// block_collection ::= block_sequence | block_mapping +// flow_collection ::= flow_sequence | flow_mapping +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// block_mapping ::= BLOCK-MAPPING_START +// ((KEY block_node_or_indentless_sequence?)? +// (VALUE block_node_or_indentless_sequence?)?)* +// BLOCK-END +// flow_sequence ::= FLOW-SEQUENCE-START +// (flow_sequence_entry FLOW-ENTRY)* +// flow_sequence_entry? +// FLOW-SEQUENCE-END +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// flow_mapping ::= FLOW-MAPPING-START +// (flow_mapping_entry FLOW-ENTRY)* +// flow_mapping_entry? +// FLOW-MAPPING-END +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + +// Peek the next token in the token queue. +func peek_token(parser *yaml_parser_t) *yaml_token_t { + if parser.token_available || yaml_parser_fetch_more_tokens(parser) { + return &parser.tokens[parser.tokens_head] + } + return nil +} + +// Remove the next token from the queue (must be called after peek_token). +func skip_token(parser *yaml_parser_t) { + parser.token_available = false + parser.tokens_parsed++ + parser.stream_end_produced = parser.tokens[parser.tokens_head].typ == yaml_STREAM_END_TOKEN + parser.tokens_head++ +} + +// Get the next event. +func yaml_parser_parse(parser *yaml_parser_t, event *yaml_event_t) bool { + // Erase the event object. + *event = yaml_event_t{} + + // No events after the end of the stream or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR || parser.state == yaml_PARSE_END_STATE { + return true + } + + // Generate the next event. + return yaml_parser_state_machine(parser, event) +} + +// Set parser error. +func yaml_parser_set_parser_error(parser *yaml_parser_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +func yaml_parser_set_parser_error_context(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string, problem_mark yaml_mark_t) bool { + parser.error = yaml_PARSER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = problem_mark + return false +} + +// State dispatcher. +func yaml_parser_state_machine(parser *yaml_parser_t, event *yaml_event_t) bool { + switch parser.state { + case yaml_PARSE_STREAM_START_STATE: + return yaml_parser_parse_stream_start(parser, event) + + case yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, true) + + case yaml_PARSE_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, event, false) + + case yaml_PARSE_DOCUMENT_CONTENT_STATE: + return yaml_parser_parse_document_content(parser, event) + + case yaml_PARSE_DOCUMENT_END_STATE: + return yaml_parser_parse_document_end(parser, event) + + case yaml_PARSE_BLOCK_NODE_STATE: + return yaml_parser_parse_node(parser, event, true, false) + + case yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return yaml_parser_parse_node(parser, event, true, true) + + case yaml_PARSE_FLOW_NODE_STATE: + return yaml_parser_parse_node(parser, event, false, false) + + case yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, true) + + case yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, event, false) + + case yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_indentless_sequence_entry(parser, event) + + case yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, true) + + case yaml_PARSE_BLOCK_MAPPING_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, event, false) + + case yaml_PARSE_BLOCK_MAPPING_VALUE_STATE: + return yaml_parser_parse_block_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, true) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, event, false) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_key(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_value(parser, event) + + case yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_end(parser, event) + + case yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, true) + + case yaml_PARSE_FLOW_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, false) + + case yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, event, true) + + default: + panic("invalid parser state") + } + return false +} + +// Parse the production: +// stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +// ************ +func yaml_parser_parse_stream_start(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_STREAM_START_TOKEN { + return yaml_parser_set_parser_error(parser, "did not find expected <stream-start>", token.start_mark) + } + parser.state = yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + event.stream_start.encoding = token.stream_start.encoding + skip_token(parser) + return true +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// * +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// ************************* +func yaml_parser_parse_document_start(parser *yaml_parser_t, event *yaml_event_t, implicit bool) bool { + token := peek_token(parser) + if token == nil { + return false + } + + // Parse extra document end indicators. + if !implicit { + for token.typ == yaml_DOCUMENT_END_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + if implicit && token.typ != yaml_VERSION_DIRECTIVE_TOKEN && + // Parse an implicit document. + token.typ != yaml_TAG_DIRECTIVE_TOKEN && + token.typ != yaml_DOCUMENT_START_TOKEN && + token.typ != yaml_STREAM_END_TOKEN { + if !yaml_parser_process_directives(parser, nil, nil) { + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_BLOCK_NODE_STATE + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + } else if token.typ != yaml_STREAM_END_TOKEN { + // Parse an explicit document. + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + start_mark := token.start_mark + if !yaml_parser_process_directives(parser, &version_directive, &tag_directives) { + return false + } + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_DOCUMENT_START_TOKEN { + yaml_parser_set_parser_error(parser, + "did not find expected <document start>", token.start_mark) + return false + } + parser.states = append(parser.states, yaml_PARSE_DOCUMENT_END_STATE) + parser.state = yaml_PARSE_DOCUMENT_CONTENT_STATE + end_mark := token.end_mark + + *event = yaml_event_t{ + typ: yaml_DOCUMENT_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.document_start.version_directive = version_directive + event.document_start.tag_directives = tag_directives + event.document_start.implicit = false + skip_token(parser) + + } else { + // Parse the stream end. + parser.state = yaml_PARSE_END_STATE + *event = yaml_event_t{ + typ: yaml_STREAM_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + } + + return true +} + +// Parse the productions: +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// *********** +// +func yaml_parser_parse_document_content(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN || + token.typ == yaml_TAG_DIRECTIVE_TOKEN || + token.typ == yaml_DOCUMENT_START_TOKEN || + token.typ == yaml_DOCUMENT_END_TOKEN || + token.typ == yaml_STREAM_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + return yaml_parser_process_empty_scalar(parser, event, + token.start_mark) + } else { + return yaml_parser_parse_node(parser, event, true, false) + } +} + +// Parse the productions: +// implicit_document ::= block_node DOCUMENT-END* +// ************* +// explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +// +func yaml_parser_parse_document_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + start_mark := token.start_mark + end_mark := token.start_mark + + implicit := true + if token.typ == yaml_DOCUMENT_END_TOKEN { + end_mark = token.end_mark + skip_token(parser) + implicit = false + } + + parser.tag_directives = parser.tag_directives[:0] + + parser.state = yaml_PARSE_DOCUMENT_START_STATE + *event = yaml_event_t{ + typ: yaml_DOCUMENT_END_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.document_start.implicit = implicit + return true +} + +// Parse the productions: +// block_node_or_indentless_sequence ::= +// ALIAS +// ***** +// | properties (block_content | indentless_block_sequence)? +// ********** * +// | block_content | indentless_block_sequence +// * +// block_node ::= ALIAS +// ***** +// | properties block_content? +// ********** * +// | block_content +// * +// flow_node ::= ALIAS +// ***** +// | properties flow_content? +// ********** * +// | flow_content +// * +// properties ::= TAG ANCHOR? | ANCHOR TAG? +// ************************* +// block_content ::= block_collection | flow_collection | SCALAR +// ****** +// flow_content ::= flow_collection | SCALAR +// ****** +func yaml_parser_parse_node(parser *yaml_parser_t, event *yaml_event_t, block, indentless_sequence bool) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_ALIAS_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + *event = yaml_event_t{ + typ: yaml_ALIAS_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + event.alias.anchor = token.alias.value + skip_token(parser) + return true + } + + start_mark := token.start_mark + end_mark := token.start_mark + + var tag_token bool + var tag_handle, tag_suffix, anchor []byte + var tag_mark yaml_mark_t + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.anchor.value + start_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.tag.handle + tag_suffix = token.tag.suffix + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } else if token.typ == yaml_TAG_TOKEN { + tag_token = true + tag_handle = token.tag.handle + tag_suffix = token.tag.suffix + start_mark = token.start_mark + tag_mark = token.start_mark + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_ANCHOR_TOKEN { + anchor = token.anchor.value + end_mark = token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + } + + var tag []byte + if tag_token { + if len(tag_handle) == 0 { + tag = tag_suffix + tag_suffix = nil + } else { + for i := range parser.tag_directives { + if bytes.Equal(parser.tag_directives[i].handle, tag_handle) { + tag := append([]byte(nil), parser.tag_directives[i].prefix...) + tag = append(tag, tag_suffix...) + break + } + } + if len(tag) == 0 { + yaml_parser_set_parser_error_context(parser, + "while parsing a node", start_mark, + "found undefined tag handle", tag_mark) + return false + } + } + } + + implicit := len(tag) == 0 + if indentless_sequence && token.typ == yaml_BLOCK_ENTRY_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.sequence_start.anchor = anchor + event.sequence_start.tag = tag + event.sequence_start.implicit = implicit + event.sequence_start.style = yaml_BLOCK_SEQUENCE_STYLE + return true + } + if token.typ == yaml_SCALAR_TOKEN { + var plain_implicit, quoted_implicit bool + end_mark = token.end_mark + if (len(tag) == 0 && token.scalar.style == yaml_PLAIN_SCALAR_STYLE) || (len(tag) == 1 && tag[0] == '!') { + plain_implicit = true + } else if len(tag) == 0 { + quoted_implicit = true + } + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.scalar.anchor = anchor + event.scalar.tag = tag + event.scalar.value = token.scalar.value + event.scalar.plain_implicit = plain_implicit + event.scalar.quoted_implicit = quoted_implicit + event.scalar.style = token.scalar.style + skip_token(parser) + return true + } + if token.typ == yaml_FLOW_SEQUENCE_START_TOKEN { + // [Go] Some of the events below can be merged as they differ only on style. + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.sequence_start.anchor = anchor + event.sequence_start.tag = tag + event.sequence_start.implicit = implicit + event.sequence_start.style = yaml_FLOW_SEQUENCE_STYLE + return true + } + if token.typ == yaml_FLOW_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.mapping_start.anchor = anchor + event.mapping_start.tag = tag + event.mapping_start.implicit = implicit + event.mapping_start.style = yaml_FLOW_MAPPING_STYLE + return true + } + if block && token.typ == yaml_BLOCK_SEQUENCE_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_SEQUENCE_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.sequence_start.anchor = anchor + event.sequence_start.tag = tag + event.sequence_start.implicit = implicit + event.sequence_start.style = yaml_BLOCK_SEQUENCE_STYLE + return true + } + if block && token.typ == yaml_BLOCK_MAPPING_START_TOKEN { + end_mark = token.end_mark + parser.state = yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.mapping_start.anchor = anchor + event.mapping_start.tag = tag + event.mapping_start.implicit = implicit + event.mapping_start.style = yaml_BLOCK_MAPPING_STYLE + return true + } + if len(anchor) > 0 || len(tag) > 0 { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: start_mark, + end_mark: end_mark, + } + event.scalar.anchor = anchor + event.scalar.tag = tag + event.scalar.plain_implicit = implicit + event.scalar.quoted_implicit = false + event.scalar.style = yaml_PLAIN_SCALAR_STYLE + return true + } + + context := "while parsing a flow node" + if block { + context = "while parsing a block node" + } + yaml_parser_set_parser_error_context(parser, context, start_mark, + "did not find expected node content", token.start_mark) + return false +} + +// Parse the productions: +// block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +// ******************** *********** * ********* +// +func yaml_parser_parse_block_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } else { + parser.state = yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } + if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block collection", context_mark, + "did not find expected '-' indicator", token.start_mark) +} + +// Parse the productions: +// indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +// *********** * +func yaml_parser_parse_indentless_sequence_entry(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_BLOCK_ENTRY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_BLOCK_ENTRY_TOKEN && + token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, true, false) + } else { + parser.state = yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } else { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be token.end_mark? + } + return true + } +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// ******************* +// ((KEY block_node_or_indentless_sequence?)? +// *** * +// (VALUE block_node_or_indentless_sequence?)?)* +// +// BLOCK-END +// ********* +// +func yaml_parser_parse_block_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ == yaml_KEY_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } else { + parser.state = yaml_PARSE_BLOCK_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } else if token.typ == yaml_BLOCK_END_TOKEN { + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + return true + } + + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a block mapping", context_mark, + "did not find expected key", token.start_mark) +} + +// Parse the productions: +// block_mapping ::= BLOCK-MAPPING_START +// +// ((KEY block_node_or_indentless_sequence?)? +// +// (VALUE block_node_or_indentless_sequence?)?)* +// ***** * +// BLOCK-END +// +// +func yaml_parser_parse_block_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + mark := token.end_mark + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_KEY_TOKEN && + token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_BLOCK_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_BLOCK_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, true, true) + } else { + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } + } else { + parser.state = yaml_PARSE_BLOCK_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } +} + +// Parse the productions: +// flow_sequence ::= FLOW-SEQUENCE-START +// ******************* +// (flow_sequence_entry FLOW-ENTRY)* +// * ********** +// flow_sequence_entry? +// * +// FLOW-SEQUENCE-END +// ***************** +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow sequence", context_mark, + "did not find expected ',' or ']'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_START_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + event.mapping_start.implicit = true + event.mapping_start.style = yaml_FLOW_MAPPING_STYLE + skip_token(parser) + return true + } else if token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + + *event = yaml_event_t{ + typ: yaml_SEQUENCE_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + + skip_token(parser) + return true +} + +// +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// *** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_key(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } else { + mark := token.end_mark + skip_token(parser) + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, mark) + } +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// ***** * +// +func yaml_parser_parse_flow_sequence_entry_mapping_value(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token := peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_SEQUENCE_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Parse the productions: +// flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * +// +func yaml_parser_parse_flow_sequence_entry_mapping_end(parser *yaml_parser_t, event *yaml_event_t) bool { + token := peek_token(parser) + if token == nil { + return false + } + parser.state = yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.start_mark, // [Go] Shouldn't this be end_mark? + } + return true +} + +// Parse the productions: +// flow_mapping ::= FLOW-MAPPING-START +// ****************** +// (flow_mapping_entry FLOW-ENTRY)* +// * ********** +// flow_mapping_entry? +// ****************** +// FLOW-MAPPING-END +// **************** +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * *** * +// +func yaml_parser_parse_flow_mapping_key(parser *yaml_parser_t, event *yaml_event_t, first bool) bool { + if first { + token := peek_token(parser) + parser.marks = append(parser.marks, token.start_mark) + skip_token(parser) + } + + token := peek_token(parser) + if token == nil { + return false + } + + if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + if !first { + if token.typ == yaml_FLOW_ENTRY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } else { + context_mark := parser.marks[len(parser.marks)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow mapping", context_mark, + "did not find expected ',' or '}'", token.start_mark) + } + } + + if token.typ == yaml_KEY_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_VALUE_TOKEN && + token.typ != yaml_FLOW_ENTRY_TOKEN && + token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } else { + parser.state = yaml_PARSE_FLOW_MAPPING_VALUE_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + } else if token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + + parser.state = parser.states[len(parser.states)-1] + parser.states = parser.states[:len(parser.states)-1] + parser.marks = parser.marks[:len(parser.marks)-1] + *event = yaml_event_t{ + typ: yaml_MAPPING_END_EVENT, + start_mark: token.start_mark, + end_mark: token.end_mark, + } + skip_token(parser) + return true +} + +// Parse the productions: +// flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +// * ***** * +// +func yaml_parser_parse_flow_mapping_value(parser *yaml_parser_t, event *yaml_event_t, empty bool) bool { + token := peek_token(parser) + if token == nil { + return false + } + if empty { + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) + } + if token.typ == yaml_VALUE_TOKEN { + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + if token.typ != yaml_FLOW_ENTRY_TOKEN && token.typ != yaml_FLOW_MAPPING_END_TOKEN { + parser.states = append(parser.states, yaml_PARSE_FLOW_MAPPING_KEY_STATE) + return yaml_parser_parse_node(parser, event, false, false) + } + } + parser.state = yaml_PARSE_FLOW_MAPPING_KEY_STATE + return yaml_parser_process_empty_scalar(parser, event, token.start_mark) +} + +// Generate an empty scalar event. +func yaml_parser_process_empty_scalar(parser *yaml_parser_t, event *yaml_event_t, mark yaml_mark_t) bool { + *event = yaml_event_t{ + typ: yaml_SCALAR_EVENT, + start_mark: mark, + end_mark: mark, + } + event.scalar.plain_implicit = true + event.scalar.style = yaml_PLAIN_SCALAR_STYLE + // Empty means len(event.scalar.value) == 0 + return true +} + +var default_tag_directives = []yaml_tag_directive_t{ + {[]byte("!"), []byte("!")}, + {[]byte("!!"), []byte("tag:yaml.org,2002:")}, +} + +// Parse directives. +func yaml_parser_process_directives(parser *yaml_parser_t, + version_directive_ref **yaml_version_directive_t, + tag_directives_ref *[]yaml_tag_directive_t) bool { + + var version_directive *yaml_version_directive_t + var tag_directives []yaml_tag_directive_t + + token := peek_token(parser) + if token == nil { + return false + } + + for token.typ == yaml_VERSION_DIRECTIVE_TOKEN || token.typ == yaml_TAG_DIRECTIVE_TOKEN { + if token.typ == yaml_VERSION_DIRECTIVE_TOKEN { + if version_directive != nil { + yaml_parser_set_parser_error(parser, + "found duplicate %YAML directive", token.start_mark) + return false + } + if token.version_directive.major != 1 || token.version_directive.minor != 1 { + yaml_parser_set_parser_error(parser, + "found incompatible YAML document", token.start_mark) + return false + } + version_directive = &yaml_version_directive_t{ + major: token.version_directive.major, + minor: token.version_directive.minor, + } + } else if token.typ == yaml_TAG_DIRECTIVE_TOKEN { + value := yaml_tag_directive_t{ + handle: token.tag_directive.handle, + prefix: token.tag_directive.prefix, + } + if !yaml_parser_append_tag_directive(parser, value, false, token.start_mark) { + return false + } + tag_directives = append(tag_directives, value) + } + + skip_token(parser) + token = peek_token(parser) + if token == nil { + return false + } + } + + for i := range default_tag_directives { + if !yaml_parser_append_tag_directive(parser, default_tag_directives[i], true, token.start_mark) { + return false + } + } + + if version_directive_ref != nil { + *version_directive_ref = version_directive + } + if tag_directives_ref != nil { + *tag_directives_ref = tag_directives + } + return true +} + +// Append a tag directive to the directives stack. +func yaml_parser_append_tag_directive(parser *yaml_parser_t, value yaml_tag_directive_t, allow_duplicates bool, mark yaml_mark_t) bool { + for i := range parser.tag_directives { + if bytes.Equal(value.handle, parser.tag_directives[i].handle) { + if allow_duplicates { + return true + } + return yaml_parser_set_parser_error(parser, "found duplicate %TAG directive", mark) + } + } + + // [Go] I suspect the copy is unnecessary. This was likely done + // because there was no way to track ownership of the data. + value_copy := yaml_tag_directive_t{ + handle: make([]byte, len(value.handle)), + prefix: make([]byte, len(value.prefix)), + } + copy(value_copy.handle, value.handle) + copy(value_copy.prefix, value.prefix) + parser.tag_directives = append(parser.tag_directives, value_copy) + return true +}
diff --git a/reader_c.go b/reader_c.go new file mode 100644 index 0000000..e3a8eae --- /dev/null +++ b/reader_c.go
@@ -0,0 +1,397 @@ +package goyaml + +import ( + "io" +) + +// Set the reader error and return 0. +func yaml_parser_set_reader_error(parser *yaml_parser_t, problem string, offset int, value int) bool { + parser.error = yaml_READER_ERROR + parser.problem = problem + parser.problem_offset = offset + parser.problem_value = value + return false +} + +// Byte order marks. +const ( + bom_UTF8 = "\xef\xbb\xbf" + bom_UTF16LE = "\xff\xfe" + bom_UTF16BE = "\xfe\xff" +) + +// Determine the input stream encoding by checking the BOM symbol. If no BOM is +// found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. +func yaml_parser_determine_encoding(parser *yaml_parser_t) bool { + // Ensure that we had enough bytes in the raw buffer. + for !parser.eof && len(parser.raw_buffer)-parser.raw_buffer_pos < 3 { + if !yaml_parser_update_raw_buffer(parser) { + return false + } + } + + // Determine the encoding. + buf := parser.raw_buffer + pos := parser.raw_buffer_pos + avail := len(buf) - pos + if avail >= 2 && buf[pos] == bom_UTF16LE[0] && buf[pos+1] == bom_UTF16LE[1] { + parser.encoding = yaml_UTF16LE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 2 && buf[pos] == bom_UTF16BE[0] && buf[pos+1] == bom_UTF16BE[1] { + parser.encoding = yaml_UTF16BE_ENCODING + parser.raw_buffer_pos += 2 + parser.offset += 2 + } else if avail >= 3 && buf[pos] == bom_UTF8[0] && buf[pos+1] == bom_UTF8[1] && buf[pos+2] == bom_UTF8[2] { + parser.encoding = yaml_UTF8_ENCODING + parser.raw_buffer_pos += 3 + parser.offset += 3 + } else { + parser.encoding = yaml_UTF8_ENCODING + } + return true +} + +// Update the raw buffer. +func yaml_parser_update_raw_buffer(parser *yaml_parser_t) bool { + size_read := 0 + + // Return if the raw buffer is full. + if parser.raw_buffer_pos == 0 && len(parser.raw_buffer) == cap(parser.raw_buffer) { + return true + } + + // Return on EOF. + if parser.eof { + return true + } + + // Move the remaining bytes in the raw buffer to the beginning. + if parser.raw_buffer_pos > 0 && parser.raw_buffer_pos < len(parser.raw_buffer) { + copy(parser.raw_buffer, parser.raw_buffer[parser.raw_buffer_pos:]) + } + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)-parser.raw_buffer_pos] + parser.raw_buffer_pos = 0 + + // Call the read handler to fill the buffer. + size_read, err := parser.read_handler(parser, parser.raw_buffer[len(parser.raw_buffer):cap(parser.raw_buffer)]) + parser.raw_buffer = parser.raw_buffer[:len(parser.raw_buffer)+size_read] + if err == io.EOF { + parser.eof = true + } else if err != nil { + return yaml_parser_set_reader_error(parser, "input error: "+err.Error(), parser.offset, -1) + } + return true +} + +// Ensure that the buffer contains at least `length` characters. +// Return true on success, false on failure. +// +// The length is supposed to be significantly less that the buffer size. +func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { + if parser.read_handler == nil { + panic("read handler must be set") + } + + // If the EOF flag is set and the raw buffer is empty, do nothing. + if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) { + return true + } + + // Return if the buffer contains enough characters. + if parser.unread >= length { + return true + } + + // Determine the input encoding if it is not known yet. + if parser.encoding == yaml_ANY_ENCODING { + if !yaml_parser_determine_encoding(parser) { + return false + } + } + + // Move the unread characters to the beginning of the buffer. + if parser.buffer_pos > 0 && parser.buffer_pos < len(parser.buffer) { + size := len(parser.buffer) - parser.buffer_pos + copy(parser.buffer, parser.buffer[parser.buffer_pos:]) + parser.buffer_pos = 0 + parser.buffer = parser.buffer[:size] + } else if parser.buffer_pos == len(parser.buffer) { + parser.buffer_pos = 0 + parser.buffer = parser.buffer[:0] + } + + // Fill the buffer until it has enough characters. + first := true + for parser.unread < length { + + // Fill the raw buffer if necessary. + if !first || parser.raw_buffer_pos == len(parser.raw_buffer) { + if !yaml_parser_update_raw_buffer(parser) { + return false + } + } + first = false + + // Decode the raw buffer. + for parser.raw_buffer_pos != len(parser.raw_buffer) { + var value, value2 rune + var incomplete bool + var width int + + raw_unread := len(parser.raw_buffer) - parser.raw_buffer_pos + + // Decode the next character. + switch parser.encoding { + case yaml_UTF8_ENCODING: + // Decode a UTF-8 character. Check RFC 3629 + // (http://www.ietf.org/rfc/rfc3629.txt) for more details. + // + // The following table (taken from the RFC) is used for + // decoding. + // + // Char. number range | UTF-8 octet sequence + // (hexadecimal) | (binary) + // --------------------+------------------------------------ + // 0000 0000-0000 007F | 0xxxxxxx + // 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + // 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + // 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + // + // Additionally, the characters in the range 0xD800-0xDFFF + // are prohibited as they are reserved for use with UTF-16 + // surrogate pairs. + + // Determine the length of the UTF-8 sequence. + octet := parser.raw_buffer[parser.raw_buffer_pos] + switch { + case octet&0x80 == 0x00: + width = 1 + case octet&0xE0 == 0xC0: + width = 2 + case octet&0xF0 == 0xE0: + width = 3 + case octet&0xF8 == 0xF0: + width = 4 + default: + // The leading octet is invalid. + return yaml_parser_set_reader_error(parser, + "invalid leading UTF-8 octet", + parser.offset, int(octet)) + } + + // Check if the raw buffer contains an incomplete character. + if width > raw_unread { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-8 octet sequence", + parser.offset, -1) + } + incomplete = true + break + } + + // Decode the leading octet. + switch { + case octet&0x80 == 0x00: + value = rune(octet & 0x7F) + case octet&0xE0 == 0xC0: + value = rune(octet & 0x1F) + case octet&0xF0 == 0xE0: + value = rune(octet & 0x0F) + case octet&0xF8 == 0xF0: + value = rune(octet & 0x07) + default: + value = 0 + } + + // Check and decode the trailing octets. + for k := 1; k < width; k++ { + octet = parser.raw_buffer[parser.raw_buffer_pos+k] + + // Check if the octet is valid. + if (octet & 0xC0) != 0x80 { + return yaml_parser_set_reader_error(parser, + "invalid trailing UTF-8 octet", + parser.offset+k, int(octet)) + } + + // Decode the octet. + value = (value << 6) + rune(octet&0x3F) + } + + // Check the length of the sequence against the value. + switch { + case width == 1: + case width == 2 && value >= 0x80: + case width == 3 && value >= 0x800: + case width == 4 && value >= 0x10000: + default: + return yaml_parser_set_reader_error(parser, + "invalid length of a UTF-8 sequence", + parser.offset, -1) + } + + // Check the range of the value. + if value >= 0xD800 && value <= 0xDFFF || value > 0x10FFFF { + return yaml_parser_set_reader_error(parser, + "invalid Unicode character", + parser.offset, int(value)) + } + + case yaml_UTF16LE_ENCODING, yaml_UTF16BE_ENCODING: + var low, high int + if parser.encoding == yaml_UTF16LE_ENCODING { + low, high = 0, 1 + } else { + high, low = 1, 0 + } + + // The UTF-16 encoding is not as simple as one might + // naively think. Check RFC 2781 + // (http://www.ietf.org/rfc/rfc2781.txt). + // + // Normally, two subsequent bytes describe a Unicode + // character. However a special technique (called a + // surrogate pair) is used for specifying character + // values larger than 0xFFFF. + // + // A surrogate pair consists of two pseudo-characters: + // high surrogate area (0xD800-0xDBFF) + // low surrogate area (0xDC00-0xDFFF) + // + // The following formulas are used for decoding + // and encoding characters using surrogate pairs: + // + // U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) + // U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) + // W1 = 110110yyyyyyyyyy + // W2 = 110111xxxxxxxxxx + // + // where U is the character value, W1 is the high surrogate + // area, W2 is the low surrogate area. + + // Check for incomplete UTF-16 character. + if raw_unread < 2 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 character", + parser.offset, -1) + } + incomplete = true + break + } + + // Get the character. + value = rune(parser.raw_buffer[parser.raw_buffer_pos+low]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high]) << 8) + + // Check for unexpected low surrogate area. + if value&0xFC00 == 0xDC00 { + return yaml_parser_set_reader_error(parser, + "unexpected low surrogate area", + parser.offset, int(value)) + } + + // Check for a high surrogate area. + if value&0xFC00 == 0xD800 { + width = 4 + + // Check for incomplete surrogate pair. + if raw_unread < 4 { + if parser.eof { + return yaml_parser_set_reader_error(parser, + "incomplete UTF-16 surrogate pair", + parser.offset, -1) + } + incomplete = true + break + } + + // Get the next character. + value2 = rune(parser.raw_buffer[parser.raw_buffer_pos+low+2]) + + (rune(parser.raw_buffer[parser.raw_buffer_pos+high+2]) << 8) + + // Check for a low surrogate area. + if value2&0xFC00 != 0xDC00 { + return yaml_parser_set_reader_error(parser, + "expected low surrogate area", + parser.offset+2, int(value2)) + } + + // Generate the value of the surrogate pair. + value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF) + } else { + width = 2 + } + + default: + panic("impossible") + } + + // Check if the raw buffer contains enough bytes to form a character. + if incomplete { + break + } + + // Check if the character is in the allowed range: + // #x9 | #xA | #xD | [#x20-#x7E] (8 bit) + // | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) + // | [#x10000-#x10FFFF] (32 bit) + switch { + case value == 0x09: + case value == 0x0A: + case value == 0x0D: + case value >= 0x20 && value <= 0x7E: + case value == 0x85: + case value >= 0xA0 && value <= 0xD7FF: + case value >= 0xE000 && value <= 0xFFFD: + case value >= 0x10000 && value <= 0x10FFFF: + default: + return yaml_parser_set_reader_error(parser, + "control characters are not allowed", + parser.offset, int(value)) + } + + // Move the raw pointers. + parser.raw_buffer_pos += width + parser.offset += width + + pos := len(parser.buffer) + parser.buffer = parser.buffer[:pos+width] + + // Finally put the character into the buffer. + if value <= 0x7F { + // 0000 0000-0000 007F . 0xxxxxxx + parser.buffer[pos+0] = byte(value) + } else if value <= 0x7FF { + // 0000 0080-0000 07FF . 110xxxxx 10xxxxxx + parser.buffer[pos+0] = byte(0xC0 + (value >> 6)) + parser.buffer[pos+1] = byte(0x80 + (value & 0x3F)) + } else if value <= 0xFFFF { + // 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx + parser.buffer[pos+0] = byte(0xE0 + (value >> 12)) + parser.buffer[pos+1] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[pos+2] = byte(0x80 + (value & 0x3F)) + } else { + // 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + parser.buffer[pos+0] = byte(0xF0 + (value >> 18)) + parser.buffer[pos+1] = byte(0x80 + ((value >> 12) & 0x3F)) + parser.buffer[pos+2] = byte(0x80 + ((value >> 6) & 0x3F)) + parser.buffer[pos+3] = byte(0x80 + (value & 0x3F)) + } + + parser.unread++ + } + + // On EOF, put NUL into the buffer and return. + if parser.eof { + parser.buffer = parser.buffer[:len(parser.buffer)+1] + parser.buffer[len(parser.buffer)-1] = 0x00 + parser.unread++ + return true + } + } + + return true +}
diff --git a/scanner_c.go b/scanner_c.go new file mode 100644 index 0000000..e926272 --- /dev/null +++ b/scanner_c.go
@@ -0,0 +1,2728 @@ +package goyaml + +import ( + "bytes" +) + +// Introduction +// ************ +// +// The following notes assume that you are familiar with the YAML specification +// (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in +// some cases we are less restrictive that it requires. +// +// The process of transforming a YAML stream into a sequence of events is +// divided on two steps: Scanning and Parsing. +// +// The Scanner transforms the input stream into a sequence of tokens, while the +// parser transform the sequence of tokens produced by the Scanner into a +// sequence of parsing events. +// +// The Scanner is rather clever and complicated. The Parser, on the contrary, +// is a straightforward implementation of a recursive-descendant parser (or, +// LL(1) parser, as it is usually called). +// +// Actually there are two issues of Scanning that might be called "clever", the +// rest is quite straightforward. The issues are "block collection start" and +// "simple keys". Both issues are explained below in details. +// +// Here the Scanning step is explained and implemented. We start with the list +// of all the tokens produced by the Scanner together with short descriptions. +// +// Now, tokens: +// +// STREAM-START(encoding) # The stream start. +// STREAM-END # The stream end. +// VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. +// TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. +// DOCUMENT-START # '---' +// DOCUMENT-END # '...' +// BLOCK-SEQUENCE-START # Indentation increase denoting a block +// BLOCK-MAPPING-START # sequence or a block mapping. +// BLOCK-END # Indentation decrease. +// FLOW-SEQUENCE-START # '[' +// FLOW-SEQUENCE-END # ']' +// BLOCK-SEQUENCE-START # '{' +// BLOCK-SEQUENCE-END # '}' +// BLOCK-ENTRY # '-' +// FLOW-ENTRY # ',' +// KEY # '?' or nothing (simple keys). +// VALUE # ':' +// ALIAS(anchor) # '*anchor' +// ANCHOR(anchor) # '&anchor' +// TAG(handle,suffix) # '!handle!suffix' +// SCALAR(value,style) # A scalar. +// +// The following two tokens are "virtual" tokens denoting the beginning and the +// end of the stream: +// +// STREAM-START(encoding) +// STREAM-END +// +// We pass the information about the input stream encoding with the +// STREAM-START token. +// +// The next two tokens are responsible for tags: +// +// VERSION-DIRECTIVE(major,minor) +// TAG-DIRECTIVE(handle,prefix) +// +// Example: +// +// %YAML 1.1 +// %TAG ! !foo +// %TAG !yaml! tag:yaml.org,2002: +// --- +// +// The correspoding sequence of tokens: +// +// STREAM-START(utf-8) +// VERSION-DIRECTIVE(1,1) +// TAG-DIRECTIVE("!","!foo") +// TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") +// DOCUMENT-START +// STREAM-END +// +// Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole +// line. +// +// The document start and end indicators are represented by: +// +// DOCUMENT-START +// DOCUMENT-END +// +// Note that if a YAML stream contains an implicit document (without '---' +// and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be +// produced. +// +// In the following examples, we present whole documents together with the +// produced tokens. +// +// 1. An implicit document: +// +// 'a scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// STREAM-END +// +// 2. An explicit document: +// +// --- +// 'a scalar' +// ... +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// SCALAR("a scalar",single-quoted) +// DOCUMENT-END +// STREAM-END +// +// 3. Several documents in a stream: +// +// 'a scalar' +// --- +// 'another scalar' +// --- +// 'yet another scalar' +// +// Tokens: +// +// STREAM-START(utf-8) +// SCALAR("a scalar",single-quoted) +// DOCUMENT-START +// SCALAR("another scalar",single-quoted) +// DOCUMENT-START +// SCALAR("yet another scalar",single-quoted) +// STREAM-END +// +// We have already introduced the SCALAR token above. The following tokens are +// used to describe aliases, anchors, tag, and scalars: +// +// ALIAS(anchor) +// ANCHOR(anchor) +// TAG(handle,suffix) +// SCALAR(value,style) +// +// The following series of examples illustrate the usage of these tokens: +// +// 1. A recursive sequence: +// +// &A [ *A ] +// +// Tokens: +// +// STREAM-START(utf-8) +// ANCHOR("A") +// FLOW-SEQUENCE-START +// ALIAS("A") +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A tagged scalar: +// +// !!float "3.14" # A good approximation. +// +// Tokens: +// +// STREAM-START(utf-8) +// TAG("!!","float") +// SCALAR("3.14",double-quoted) +// STREAM-END +// +// 3. Various scalar styles: +// +// --- # Implicit empty plain scalars do not produce tokens. +// --- a plain scalar +// --- 'a single-quoted scalar' +// --- "a double-quoted scalar" +// --- |- +// a literal scalar +// --- >- +// a folded +// scalar +// +// Tokens: +// +// STREAM-START(utf-8) +// DOCUMENT-START +// DOCUMENT-START +// SCALAR("a plain scalar",plain) +// DOCUMENT-START +// SCALAR("a single-quoted scalar",single-quoted) +// DOCUMENT-START +// SCALAR("a double-quoted scalar",double-quoted) +// DOCUMENT-START +// SCALAR("a literal scalar",literal) +// DOCUMENT-START +// SCALAR("a folded scalar",folded) +// STREAM-END +// +// Now it's time to review collection-related tokens. We will start with +// flow collections: +// +// FLOW-SEQUENCE-START +// FLOW-SEQUENCE-END +// FLOW-MAPPING-START +// FLOW-MAPPING-END +// FLOW-ENTRY +// KEY +// VALUE +// +// The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and +// FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' +// correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the +// indicators '?' and ':', which are used for denoting mapping keys and values, +// are represented by the KEY and VALUE tokens. +// +// The following examples show flow collections: +// +// 1. A flow sequence: +// +// [item 1, item 2, item 3] +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-SEQUENCE-START +// SCALAR("item 1",plain) +// FLOW-ENTRY +// SCALAR("item 2",plain) +// FLOW-ENTRY +// SCALAR("item 3",plain) +// FLOW-SEQUENCE-END +// STREAM-END +// +// 2. A flow mapping: +// +// { +// a simple key: a value, # Note that the KEY token is produced. +// ? a complex key: another value, +// } +// +// Tokens: +// +// STREAM-START(utf-8) +// FLOW-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// FLOW-ENTRY +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// FLOW-ENTRY +// FLOW-MAPPING-END +// STREAM-END +// +// A simple key is a key which is not denoted by the '?' indicator. Note that +// the Scanner still produce the KEY token whenever it encounters a simple key. +// +// For scanning block collections, the following tokens are used (note that we +// repeat KEY and VALUE here): +// +// BLOCK-SEQUENCE-START +// BLOCK-MAPPING-START +// BLOCK-END +// BLOCK-ENTRY +// KEY +// VALUE +// +// The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation +// increase that precedes a block collection (cf. the INDENT token in Python). +// The token BLOCK-END denote indentation decrease that ends a block collection +// (cf. the DEDENT token in Python). However YAML has some syntax pecularities +// that makes detections of these tokens more complex. +// +// The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators +// '-', '?', and ':' correspondingly. +// +// The following examples show how the tokens BLOCK-SEQUENCE-START, +// BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: +// +// 1. Block sequences: +// +// - item 1 +// - item 2 +// - +// - item 3.1 +// - item 3.2 +// - +// key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 3.1",plain) +// BLOCK-ENTRY +// SCALAR("item 3.2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Block mappings: +// +// a simple key: a value # The KEY token is produced here. +// ? a complex key +// : another value +// a mapping: +// key 1: value 1 +// key 2: value 2 +// a sequence: +// - item 1 +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a simple key",plain) +// VALUE +// SCALAR("a value",plain) +// KEY +// SCALAR("a complex key",plain) +// VALUE +// SCALAR("another value",plain) +// KEY +// SCALAR("a mapping",plain) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML does not always require to start a new block collection from a new +// line. If the current line contains only '-', '?', and ':' indicators, a new +// block collection may start at the current line. The following examples +// illustrate this case: +// +// 1. Collections in a sequence: +// +// - - item 1 +// - item 2 +// - key 1: value 1 +// key 2: value 2 +// - ? complex key +// : complex value +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-ENTRY +// BLOCK-MAPPING-START +// KEY +// SCALAR("complex key") +// VALUE +// SCALAR("complex value") +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// 2. Collections in a mapping: +// +// ? a sequence +// : - item 1 +// - item 2 +// ? a mapping +// : key 1: value 1 +// key 2: value 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("a sequence",plain) +// VALUE +// BLOCK-SEQUENCE-START +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// KEY +// SCALAR("a mapping",plain) +// VALUE +// BLOCK-MAPPING-START +// KEY +// SCALAR("key 1",plain) +// VALUE +// SCALAR("value 1",plain) +// KEY +// SCALAR("key 2",plain) +// VALUE +// SCALAR("value 2",plain) +// BLOCK-END +// BLOCK-END +// STREAM-END +// +// YAML also permits non-indented sequences if they are included into a block +// mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: +// +// key: +// - item 1 # BLOCK-SEQUENCE-START is NOT produced here. +// - item 2 +// +// Tokens: +// +// STREAM-START(utf-8) +// BLOCK-MAPPING-START +// KEY +// SCALAR("key",plain) +// VALUE +// BLOCK-ENTRY +// SCALAR("item 1",plain) +// BLOCK-ENTRY +// SCALAR("item 2",plain) +// BLOCK-END +// + +// Ensure that the buffer contains the required number of characters. +// Return 1 on success, 0 on failure (reader error or memory error). +func cache(parser *yaml_parser_t, length int) bool { + return parser.unread >= length || yaml_parser_update_buffer(parser, length) +} + +// Advance the buffer pointer. +func skip(parser *yaml_parser_t) { + parser.mark.index++ + parser.mark.column++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) +} + +func skip_line(parser *yaml_parser_t) { + if is_crlf(parser.buffer, parser.buffer_pos) { + parser.mark.index += 2 + parser.mark.column = 0 + parser.mark.line++ + parser.unread -= 2 + parser.buffer_pos += 2 + } else if is_break(parser.buffer, parser.buffer_pos) { + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + parser.buffer_pos += width(parser.buffer[parser.buffer_pos]) + } +} + +// Copy a character to a string buffer and advance pointers. +func read(parser *yaml_parser_t, s *[]byte) bool { + buf := parser.buffer + pos := parser.buffer_pos + w := width(buf[0]) + *s = append(*s, buf[pos:pos+w]...) + parser.buffer_pos += w + parser.mark.index++ + parser.mark.column++ + parser.unread-- + return true +} + +// Copy a line break character to a string buffer and advance pointers. +func read_line(parser *yaml_parser_t, s *[]byte) bool { + buf := parser.buffer + pos := parser.buffer_pos + switch { + case buf[pos] == '\r' && buf[pos+1] == '\n': + // CR LF . LF + *s = append(*s, '\n') + parser.buffer_pos += 2 + parser.mark.index++ + parser.unread-- + case buf[pos] == '\r' || buf[pos+1] == '\n': + // CR|LF . LF + *s = append(*s, '\n') + parser.buffer_pos += 1 + case buf[pos] == '\xC2' && buf[pos+1] == '\x85': + // NEL . LF + *s = append(*s, '\n') + parser.buffer_pos += 2 + case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'): + // LS|PS . LS|PS + *s = append(*s, buf[parser.buffer_pos:pos+3]...) + parser.buffer_pos += 3 + default: + return false + } + parser.mark.index++ + parser.mark.column = 0 + parser.mark.line++ + parser.unread-- + return true +} + +// Get the next token. +func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool { + // Erase the token object. + *token = yaml_token_t{} // [Go] Is this necessary? + + // No tokens after STREAM-END or error. + if parser.stream_end_produced || parser.error != yaml_NO_ERROR { + return true + } + + // Ensure that the tokens queue contains enough tokens. + if !parser.token_available { + if !yaml_parser_fetch_more_tokens(parser) { + return false + } + } + + // Fetch the next token from the queue. + *token = parser.tokens[parser.tokens_head] + parser.tokens_head++ + parser.tokens_parsed++ + parser.token_available = false + + if token.typ == yaml_STREAM_END_TOKEN { + parser.stream_end_produced = true + } + return true +} + +// Set the scanner error and return false. +func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool { + parser.error = yaml_SCANNER_ERROR + parser.context = context + parser.context_mark = context_mark + parser.problem = problem + parser.problem_mark = parser.mark + return false +} + +func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool { + context := "while parsing a tag" + if directive { + context = "while parsing a %TAG directive" + } + return yaml_parser_set_scanner_error(parser, context, context_mark, "did not find URI escaped octet") +} + +// Ensure that the tokens queue contains at least one token which can be +// returned to the Parser. +func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { + // While we need more tokens to fetch, do it. + for { + // Check if we really need to fetch more tokens. + need_more_tokens := false + + if parser.tokens_head == len(parser.tokens) { + // Queue is empty. + need_more_tokens = true + } else { + // Check if any potential simple key may occupy the head position. + if !yaml_parser_stale_simple_keys(parser) { + return false + } + + for _, simple_key := range parser.simple_keys { + if simple_key.possible && simple_key.token_number == parser.tokens_parsed { + need_more_tokens = true + break + } + } + } + + // We are finished. + if !need_more_tokens { + break + } + // Fetch the next token. + if !yaml_parser_fetch_next_token(parser) { + return false + } + } + + parser.token_available = true + return true +} + +// The dispatcher for token fetchers. +func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { + + // Ensure that the buffer is initialized. + if !cache(parser, 1) { + return false + } + + // Check if we just started scanning. Fetch STREAM-START then. + if !parser.stream_start_produced { + return yaml_parser_fetch_stream_start(parser) + } + + // Eat whitespaces and comments until we reach the next token. + if !yaml_parser_scan_to_next_token(parser) { + return false + } + + // Remove obsolete potential simple keys. + if !yaml_parser_stale_simple_keys(parser) { + return false + } + + // Check the indentation level against the current column. + if !yaml_parser_unroll_indent(parser, parser.mark.column) { + return false + } + + // Ensure that the buffer contains at least 4 characters. 4 is the length + // of the longest indicators ('--- ' and '... '). + if !cache(parser, 4) { + return false + } + + // Is it the end of the stream? + if is_z(parser.buffer, parser.buffer_pos) { + return yaml_parser_fetch_stream_end(parser) + } + + // Is it a directive? + if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' { + return yaml_parser_fetch_directive(parser) + } + + buf := parser.buffer + pos := parser.buffer_pos + + // Is it the document start indicator? + if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN) + } + + // Is it the document end indicator? + if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) { + return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN) + } + + // Is it the flow sequence start indicator? + if buf[pos] == '[' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN) + } + + // Is it the flow mapping start indicator? + if parser.buffer[parser.buffer_pos] == '{' { + return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN) + } + + // Is it the flow sequence end indicator? + if parser.buffer[parser.buffer_pos] == ']' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_SEQUENCE_END_TOKEN) + } + + // Is it the flow mapping end indicator? + if parser.buffer[parser.buffer_pos] == '}' { + return yaml_parser_fetch_flow_collection_end(parser, + yaml_FLOW_MAPPING_END_TOKEN) + } + + // Is it the flow entry indicator? + if parser.buffer[parser.buffer_pos] == ',' { + return yaml_parser_fetch_flow_entry(parser) + } + + // Is it the block entry indicator? + if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) { + return yaml_parser_fetch_block_entry(parser) + } + + // Is it the key indicator? + if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_key(parser) + } + + // Is it the value indicator? + if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_value(parser) + } + + // Is it an alias? + if parser.buffer[parser.buffer_pos] == '*' { + return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN) + } + + // Is it an anchor? + if parser.buffer[parser.buffer_pos] == '&' { + return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN) + } + + // Is it a tag? + if parser.buffer[parser.buffer_pos] == '!' { + return yaml_parser_fetch_tag(parser) + } + + // Is it a literal scalar? + if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, true) + } + + // Is it a folded scalar? + if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 { + return yaml_parser_fetch_block_scalar(parser, false) + } + + // Is it a single-quoted scalar? + if parser.buffer[parser.buffer_pos] == '\'' { + return yaml_parser_fetch_flow_scalar(parser, true) + } + + // Is it a double-quoted scalar? + if parser.buffer[parser.buffer_pos] == '"' { + return yaml_parser_fetch_flow_scalar(parser, false) + } + + // Is it a plain scalar? + // + // A plain scalar may start with any non-blank characters except + // + // '-', '?', ':', ',', '[', ']', '{', '}', + // '#', '&', '*', '!', '|', '>', '\'', '\"', + // '%', '@', '`'. + // + // In the block context (and, for the '-' indicator, in the flow context + // too), it may also start with the characters + // + // '-', '?', ':' + // + // if it is followed by a non-space character. + // + // The last rule is more restrictive than the specification requires. + // [Go] Make this logic more reasonable. + if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' || + parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' || + parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') || + (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level == 0 && + (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') && + !is_blankz(parser.buffer, parser.buffer_pos+1)) { + return yaml_parser_fetch_plain_scalar(parser) + } + + // If we don't determine the token type so far, it is an error. + return yaml_parser_set_scanner_error(parser, + "while scanning for the next token", parser.mark, + "found character that cannot start any token") +} + +// Check the list of potential simple keys and remove the positions that +// cannot contain simple keys anymore. +func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { + // Check for a potential simple key for each flow level. + for _, simple_key := range parser.simple_keys { + + // The specification requires that a simple key + // + // - is limited to a single line, + // - is shorter than 1024 characters. + if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { + + // Check if the potential simple key to be removed is required. + if simple_key.required { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key.mark, + "could not find expected ':'") + } + simple_key.possible = false + } + } + return true +} + +// Check if a simple key may start at the current position and add it if +// needed. +func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { + // A simple key is required at the current position if the scanner is in + // the block context and the current column coincides with the indentation + // level. + + required := parser.flow_level == 0 && parser.indent == parser.mark.column + + // A simple key is required only when it is the first token in the current + // line. Therefore it is always allowed. But we add a check anyway. + if required && !parser.simple_key_allowed { + panic("should not happen") + } + + // + // If the current position may start a simple key, save it. + // + if parser.simple_key_allowed { + simple_key := yaml_simple_key_t{ + possible: true, + required: required, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + } + simple_key.mark = parser.mark + + if !yaml_parser_remove_simple_key(parser) { + return false + } + parser.simple_keys[len(parser.simple_keys)-1] = simple_key + } + return true +} + +// Remove a potential simple key at the current flow level. +func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { + i := len(parser.simple_keys) - 1 + if parser.simple_keys[i].possible { + // If the key is required, it is an error. + if parser.simple_keys[i].required { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", parser.simple_keys[i].mark, + "could not find expected ':'") + } + } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + return true +} + +// Increase the flow level and resize the simple key list if needed. +func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { + // Reset the simple key on the next level. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + + // Increase the flow level. + parser.flow_level++ + return true +} + +// Decrease the flow level. +func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { + if parser.flow_level > 0 { + parser.flow_level-- + parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] + } + return true +} + +// Push the current indentation level to the stack and set the new level +// the current column is greater than the indentation level. In this case, +// append or insert the specified token into the token queue. +func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + if parser.indent < column { + // Push the current indentation level to the stack and set the new + // indentation level. + parser.indents = append(parser.indents, parser.indent) + parser.indent = column + + // Create a token and insert it into the queue. + token := yaml_token_t{ + typ: typ, + start_mark: mark, + end_mark: mark, + } + if number > -1 { + number -= parser.tokens_parsed + } + yaml_insert_token(parser, number, &token) + } + return true +} + +// Pop indentation levels from the indents stack until the current level +// becomes less or equal to the column. For each intendation level, append +// the BLOCK-END token. +func yaml_parser_unroll_indent(parser *yaml_parser_t, column int) bool { + // In the flow context, do nothing. + if parser.flow_level > 0 { + return true + } + + // Loop through the intendation levels in the stack. + for parser.indent > column { + // Create a token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_END_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + yaml_insert_token(parser, -1, &token) + + // Pop the indentation level. + parser.indent = parser.indents[len(parser.indents)-1] + parser.indents = parser.indents[:len(parser.indents)-1] + } + return true +} + +// Initialize the scanner and produce the STREAM-START token. +func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { + // Set the initial indentation. + parser.indent = -1 + + // Initialize the simple key stack. + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + + // A simple key is allowed at the beginning of the stream. + parser.simple_key_allowed = true + + // We have started. + parser.stream_start_produced = true + + // Create the STREAM-START token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_START_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + token.stream_start.encoding = parser.encoding + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the STREAM-END token and shut down the scanner. +func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool { + // Force new line. + if parser.mark.column != 0 { + parser.mark.column = 0 + parser.mark.line++ + } + + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the STREAM-END token and append it to the queue. + token := yaml_token_t{ + typ: yaml_STREAM_END_TOKEN, + start_mark: parser.mark, + end_mark: parser.mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. +func yaml_parser_fetch_directive(parser *yaml_parser_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. + token := yaml_token_t{} + if !yaml_parser_scan_directive(parser, &token) { + return false + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the DOCUMENT-START or DOCUMENT-END token. +func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset the indentation level. + if !yaml_parser_unroll_indent(parser, -1) { + return false + } + + // Reset simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + parser.simple_key_allowed = false + + // Consume the token. + start_mark := parser.mark + + skip(parser) + skip(parser) + skip(parser) + + end_mark := parser.mark + + // Create the DOCUMENT-START or DOCUMENT-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. +func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // The indicators '[' and '{' may start a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // Increase the flow level. + if !yaml_parser_increase_flow_level(parser) { + return false + } + + // A simple key may follow the indicators '[' and '{'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. +func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // Reset any potential simple key on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Decrease the flow level. + if !yaml_parser_decrease_flow_level(parser) { + return false + } + + // No simple keys after the indicators ']' and '}'. + parser.simple_key_allowed = false + + // Consume the token. + + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. + token := yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // Append the token to the queue. + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the FLOW-ENTRY token. +func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool { + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after ','. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the FLOW-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_FLOW_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the BLOCK-ENTRY token. +func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool { + // Check if the scanner is in the block context. + if parser.flow_level == 0 { + // Check if we are allowed to start a new entry. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "block sequence entries are not allowed in this context") + } + // Add the BLOCK-SEQUENCE-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) { + return false + } + } else { + // It is an error for the '-' indicator to occur in the flow context, + // but we let the Parser detect and report about it because the Parser + // is able to point to the context. + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '-'. + parser.simple_key_allowed = true + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the BLOCK-ENTRY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_BLOCK_ENTRY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the KEY token. +func yaml_parser_fetch_key(parser *yaml_parser_t) bool { + + // In the block context, additional checks are required. + if parser.flow_level == 0 { + // Check if we are allowed to start a new key (not nessesary simple). + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping keys are not allowed in this context") + } + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Reset any potential simple keys on the current flow level. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // Simple keys are allowed after '?' in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the KEY token and append it to the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the VALUE token. +func yaml_parser_fetch_value(parser *yaml_parser_t) bool { + + simple_key := &parser.simple_keys[len(parser.simple_keys)-1] + + // Have we found a simple key? + if simple_key.possible { + // Create the KEY token and insert it into the queue. + token := yaml_token_t{ + typ: yaml_KEY_TOKEN, + start_mark: simple_key.mark, + end_mark: simple_key.mark, + } + yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token) + + // In the block context, we may need to add the BLOCK-MAPPING-START token. + if !yaml_parser_roll_indent(parser, simple_key.mark.column, + simple_key.token_number, + yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) { + return false + } + + // Remove the simple key. + simple_key.possible = false + + // A simple key cannot follow another simple key. + parser.simple_key_allowed = false + + } else { + // The ':' indicator follows a complex key. + + // In the block context, extra checks are required. + if parser.flow_level == 0 { + + // Check if we are allowed to start a complex value. + if !parser.simple_key_allowed { + return yaml_parser_set_scanner_error(parser, "", parser.mark, + "mapping values are not allowed in this context") + } + + // Add the BLOCK-MAPPING-START token if needed. + if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) { + return false + } + } + + // Simple keys after ':' are allowed in the block context. + parser.simple_key_allowed = parser.flow_level == 0 + } + + // Consume the token. + start_mark := parser.mark + skip(parser) + end_mark := parser.mark + + // Create the VALUE token and append it to the queue. + token := yaml_token_t{ + typ: yaml_VALUE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the ALIAS or ANCHOR token. +func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool { + // An anchor or an alias could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow an anchor or an alias. + parser.simple_key_allowed = false + + // Create the ALIAS or ANCHOR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_anchor(parser, &token, typ) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the TAG token. +func yaml_parser_fetch_tag(parser *yaml_parser_t) bool { + // A tag could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a tag. + parser.simple_key_allowed = false + + // Create the TAG token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_tag(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. +func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool { + // Remove any potential simple keys. + if !yaml_parser_remove_simple_key(parser) { + return false + } + + // A simple key may follow a block scalar. + parser.simple_key_allowed = true + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_block_scalar(parser, &token, literal) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. +func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_flow_scalar(parser, &token, single) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Produce the SCALAR(...,plain) token. +func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool { + // A plain scalar could be a simple key. + if !yaml_parser_save_simple_key(parser) { + return false + } + + // A simple key cannot follow a flow scalar. + parser.simple_key_allowed = false + + // Create the SCALAR token and append it to the queue. + var token yaml_token_t + if !yaml_parser_scan_plain_scalar(parser, &token) { + return false + } + yaml_insert_token(parser, -1, &token) + return true +} + +// Eat whitespaces and comments until the next token is found. +func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { + + // Until the next token is not found. + for { + // Allow the BOM mark to start a line. + if !cache(parser, 1) { + return false + } + if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) { + skip(parser) + } + + // Eat whitespaces. + // Tabs are allowed: + // - in the flow context + // - in the block context, but not at the beginning of the line or + // after '-', '?', or ':' (complex value). + if !cache(parser, 1) { + return false + } + + for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') { + skip(parser) + if !cache(parser, 1) { + return false + } + } + + // Eat a comment until a line break. + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + } + + // If it is a line break, eat it. + if is_break(parser.buffer, parser.buffer_pos) { + if !cache(parser, 2) { + return false + } + skip_line(parser) + + // In the block context, a new line may start a simple key. + if parser.flow_level == 0 { + parser.simple_key_allowed = true + } + } else { + break // We have found a token. + } + } + + return true +} + +// Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { + // Eat '%'. + start_mark := parser.mark + skip(parser) + + // Scan the directive name. + var name []byte + if !yaml_parser_scan_directive_name(parser, start_mark, &name) { + return false + } + + // Is it a YAML directive? + if bytes.Equal(name, []byte("YAML")) { + // Scan the VERSION directive value. + var major, minor int + if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) { + return false + } + end_mark := parser.mark + + // Create a VERSION-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_VERSION_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.version_directive.major = major + token.version_directive.minor = minor + + // Is it a TAG directive? + } else if bytes.Equal(name, []byte("TAG")) { + // Scan the TAG directive value. + var handle, prefix []byte + if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) { + return false + } + end_mark := parser.mark + + // Create a TAG-DIRECTIVE token. + *token = yaml_token_t{ + typ: yaml_TAG_DIRECTIVE_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.tag_directive.handle = handle + token.tag_directive.prefix = prefix + + // Unknown directive. + } else { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found uknown directive name") + return false + } + + // Eat the rest of the line including any comments. + if !cache(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if !cache(parser, 2) { + return false + } + skip_line(parser) + } + + return true +} + +// Scan the directive name. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^ +// +func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { + // Consume the directive name. + if !cache(parser, 1) { + return false + } + + var s []byte + for is_alpha(parser.buffer, parser.buffer_pos) { + if !read(parser, &s) { + return false + } + if !cache(parser, 1) { + return false + } + } + + // Check if the name is empty. + if len(s) == 0 { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "could not find expected directive name") + return false + } + + // Check for an blank character after the name. + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unexpected non-alphabetical character") + return false + } + *name = s + return true +} + +// Scan the value of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^^^^^^ +func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int) bool { + // Eat whitespaces. + if !cache(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + + // Consume the major version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, major) { + return false + } + + // Eat '.'. + if parser.buffer[parser.buffer_pos] != '.' { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected digit or '.' character") + } + + skip(parser) + + // Consume the minor version number. + if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) { + return false + } + return true +} + +const max_number_length = 9 + +// Scan the version number of VERSION-DIRECTIVE. +// +// Scope: +// %YAML 1.1 # a comment \n +// ^ +// %YAML 1.1 # a comment \n +// ^ +func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int) bool { + + // Repeat while the next character is digit. + if !cache(parser, 1) { + return false + } + var value, length int + for is_digit(parser.buffer, parser.buffer_pos) { + // Check if the number is too long. + length++ + if length > max_number_length { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "found extremely long version number") + } + value = value*10 + as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + if !cache(parser, 1) { + return false + } + } + + // Check if the number was present. + if length == 0 { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected version number") + } + *number = value + return true +} + +// Scan the value of a TAG-DIRECTIVE token. +// +// Scope: +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// +func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { + var handle_value, prefix_value []byte + + // Eat whitespaces. + if !cache(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + + // Scan a handle. + if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) { + return false + } + + // Expect a whitespace. + if !cache(parser, 1) { + return false + } + if !is_blank(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace") + return false + } + + // Eat whitespaces. + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + + // Scan a prefix. + if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) { + return false + } + + // Expect a whitespace or line break. + if !cache(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace or line break") + return false + } + + *handle = handle_value + *prefix = prefix_value + return true +} + +func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool { + var s []byte + + // Eat the indicator character. + start_mark := parser.mark + skip(parser) + + // Consume the value. + if !cache(parser, 1) { + return false + } + + for is_alpha(parser.buffer, parser.buffer_pos) { + if !read(parser, &s) { + return false + } + if !cache(parser, 1) { + return false + } + } + + end_mark := parser.mark + + /* + * Check if length of the anchor is greater than 0 and it is followed by + * a whitespace character or one of the indicators: + * + * '?', ':', ',', ']', '}', '%', '@', '`'. + */ + + if len(s) == 0 || + !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' || + parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '`') { + context := "while scanning an alias" + if typ == yaml_ANCHOR_TOKEN { + context = "while scanning an anchor" + } + yaml_parser_set_scanner_error(parser, context, start_mark, + "did not find expected alphabetic or numeric character") + return false + } + + // Create a token. + *token = yaml_token_t{ + typ: typ, + start_mark: start_mark, + end_mark: end_mark, + } + // [Go] Just use a single field instead. + if typ == yaml_ANCHOR_TOKEN { + token.anchor.value = s + } else { + token.alias.value = s + } + + return true +} + +/* + * Scan a TAG token. + */ + +func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool { + var handle, suffix []byte + + start_mark := parser.mark + + // Check if the tag is in the canonical form. + if !cache(parser, 2) { + return false + } + + if parser.buffer[parser.buffer_pos+1] == '<' { + // Keep the handle as '' + + // Eat '!<' + skip(parser) + skip(parser) + + // Consume the tag value. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + + // Check for '>' and eat it. + if parser.buffer[parser.buffer_pos] != '>' { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find the expected '>'") + return false + } + + skip(parser) + } else { + // The tag has either the '!suffix' or the '!handle!suffix' form. + + // First, try to scan a handle. + if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) { + return false + } + + // Check if it is, indeed, handle. + if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' { + // Scan the suffix now. + if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) { + return false + } + } else { + // It wasn't a handle after all. Scan the rest of the tag. + if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) { + return false + } + + // Set the handle to '!'. + handle = []byte{'!'} + + // A special case: the '!' tag. Set the handle to '' and the + // suffix to '!'. + if len(suffix) == 0 { + handle, suffix = suffix, handle + } + } + } + + // Check the character which ends the tag. + if !cache(parser, 1) { + return false + } + if !is_blankz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find expected whitespace or line break") + return false + } + + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_TAG_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.tag.handle = handle + token.tag.suffix = suffix + return true +} + +// Scan a tag handle. +func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool { + // Check the initial '!' character. + if !cache(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] != '!' { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + + var s []byte + + // Copy the '!' character. + if !read(parser, &s) { + return false + } + + // Copy all subsequent alphabetical and numerical characters. + if !cache(parser, 1) { + return false + } + for is_alpha(parser.buffer, parser.buffer_pos) { + if !read(parser, &s) { + return false + } + if !cache(parser, 1) { + return false + } + } + + // Check if the trailing character is '!' and copy it. + if parser.buffer[parser.buffer_pos] == '!' { + if !read(parser, &s) { + return false + } + } else { + // It's either the '!' tag or not really a tag handle. If it's a %TAG + // directive, it's an error. If it's a tag token, it must be a part of URI. + if directive && !(s[0] == '!' && s[1] == 0) { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected '!'") + return false + } + } + + *handle = s + return true +} + +// Scan a tag. +func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { + //size_t length = head ? strlen((char *)head) : 0 + var s []byte + + // Copy the head if needed. + // + // Note that we don't copy the leading '!' character. + if len(head) > 1 { + s = append(s, head[1:]...) + } + + // Scan the tag. + if !cache(parser, 1) { + return false + } + + // The set of characters that may appear in URI is as follows: + // + // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + // '%'. + // [Go] Convert this into more reasonable logic. + for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' || + parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' || + parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' || + parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' || + parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' || + parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' || + parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' || + parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' || + parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' || + parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || + parser.buffer[parser.buffer_pos] == '%' { + // Check if it is a URI-escape sequence. + if parser.buffer[parser.buffer_pos] == '%' { + if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) { + return false + } + } else { + if !read(parser, &s) { + return false + } + } + if !cache(parser, 1) { + return false + } + } + + // Check if the tag is non-empty. + if len(s) == 0 { + yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find expected tag URI") + return false + } + *uri = s + return true +} + +// Decode an URI-escape sequence corresponding to a single UTF-8 character. +func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool { + + // Decode the required number of characters. + w := 1024 + for w > 0 { + // Check for a URI-escaped octet. + if !cache(parser, 3) { + return false + } + + if !(parser.buffer[parser.buffer_pos] == '%' && + is_hex(parser.buffer, parser.buffer_pos+1) && + is_hex(parser.buffer, parser.buffer_pos+2)) { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "did not find URI escaped octet") + } + + // Get the octet. + octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2)) + + // If it is the leading octet, determine the length of the UTF-8 sequence. + if w == 1024 { + w = width(octet) + if w == 0 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect leading UTF-8 octet") + } + } else { + // Check if the trailing octet is correct. + if octet&0xC0 != 0x80 { + return yaml_parser_set_scanner_tag_error(parser, directive, + start_mark, "found an incorrect trailing UTF-8 octet") + } + } + + // Copy the octet and move the pointers. + *s = append(*s, octet) + skip(parser) + skip(parser) + skip(parser) + w-- + } + return true +} + +// Scan a block scalar. +func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool { + // Eat the indicator '|' or '>'. + start_mark := parser.mark + skip(parser) + + // Scan the additional block scalar indicators. + if !cache(parser, 1) { + return false + } + + // Check for a chomping indicator. + var chomping, increment int + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + // Set the chomping method and eat the indicator. + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + + // Check for an indentation indicator. + if !cache(parser, 1) { + return false + } + if is_digit(parser.buffer, parser.buffer_pos) { + // Check that the intendation is greater than 0. + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0") + return false + } + + // Get the intendation level and eat the indicator. + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + } + + } else if is_digit(parser.buffer, parser.buffer_pos) { + // Do the same as above, but in the opposite order. + + if parser.buffer[parser.buffer_pos] == '0' { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0") + return false + } + increment = as_digit(parser.buffer, parser.buffer_pos) + skip(parser) + + if !cache(parser, 1) { + return false + } + if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' { + if parser.buffer[parser.buffer_pos] == '+' { + chomping = +1 + } else { + chomping = -1 + } + skip(parser) + } + } + + // Eat whitespaces and comments to the end of the line. + if !cache(parser, 1) { + return false + } + for is_blank(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + if parser.buffer[parser.buffer_pos] == '#' { + for !is_breakz(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + } + + // Check if we are at the end of the line. + if !is_breakz(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "did not find expected comment or line break") + return false + } + + // Eat a line break. + if is_break(parser.buffer, parser.buffer_pos) { + if !cache(parser, 2) { + return false + } + skip_line(parser) + } + + end_mark := parser.mark + + // Set the intendation level if it was specified. + var indent int + if increment > 0 { + if parser.indent >= 0 { + indent = parser.indent + increment + } else { + indent = increment + } + } + + // Scan the leading line breaks and determine the indentation level if needed. + var s, leading_break, trailing_breaks []byte + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + + // Scan the block scalar content. + if !cache(parser, 1) { + return false + } + var leading_blank, trailing_blank bool + for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) { + // We are at the beginning of a non-empty line. + + // Is it a trailing whitespace? + trailing_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Check if we need to fold the leading line break. + if !literal && !leading_blank && !trailing_blank && leading_break[0] == '\n' { + // Do we need to join the lines by space? + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } + } else { + s = append(s, leading_break...) + } + leading_break = leading_break[:0] + + // Append the remaining line breaks. + s = append(s, trailing_breaks...) + trailing_breaks = trailing_breaks[:0] + + // Is it a leading whitespace? + leading_blank = is_blank(parser.buffer, parser.buffer_pos) + + // Consume the current line. + for !is_breakz(parser.buffer, parser.buffer_pos) { + if !read(parser, &s) { + return false + } + if !cache(parser, 1) { + return false + } + } + + // Consume the line break. + if !cache(parser, 2) { + return false + } + + if !read_line(parser, &leading_break) { + return false + } + + // Eat the following intendation spaces and line breaks. + if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) { + return false + } + } + + // Chomp the tail. + if chomping != -1 { + s = append(s, leading_break...) + } + if chomping == 1 { + s = append(s, trailing_breaks...) + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.scalar.value = s + if literal { + token.scalar.style = yaml_LITERAL_SCALAR_STYLE + } else { + token.scalar.style = yaml_FOLDED_SCALAR_STYLE + } + + return true +} + +// Scan intendation spaces and line breaks for a block scalar. Determine the +// intendation level if needed. +func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool { + *end_mark = parser.mark + + // Eat the intendation spaces and line breaks. + max_indent := 0 + for { + // Eat the intendation spaces. + if !cache(parser, 1) { + return false + } + for *indent == 0 || parser.mark.column < *indent && is_space(parser.buffer, parser.buffer_pos) { + skip(parser) + if !cache(parser, 1) { + return false + } + } + if parser.mark.column > max_indent { + max_indent = parser.mark.column + } + + // Check for a tab character messing the intendation. + if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) { + return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found a tab character where an intendation space is expected") + } + + // Have we found a non-empty line? + if !is_break(parser.buffer, parser.buffer_pos) { + break + } + + // Consume the line break. + if !cache(parser, 2) { + return false + } + if !read_line(parser, breaks) { + return false + } + *end_mark = parser.mark + } + + // Determine the indentation level if needed. + if *indent == 0 { + *indent = max_indent + if *indent < parser.indent+1 { + *indent = parser.indent + 1 + } + if *indent < 1 { + *indent = 1 + } + } + return true +} + +// Scan a quoted scalar. +func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool { + // Eat the left quote. + start_mark := parser.mark + skip(parser) + + // Consume the content of the quoted scalar. + var s, leading_break, trailing_breaks, whitespaces []byte + for { + // Check that there are no document indicators at the beginning of the line. + if !cache(parser, 4) { + return false + } + + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected document indicator") + return false + } + + // Check for EOF. + if is_z(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected end of stream") + return false + } + + // Consume non-blank characters. + if !cache(parser, 2) { + return false + } + leading_blanks := false + for !is_blankz(parser.buffer, parser.buffer_pos) { + if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' { + // Is is an escaped single quote. + s = append(s, '\'') + skip(parser) + skip(parser) + + } else if single && parser.buffer[parser.buffer_pos] == '\'' { + // It is a right single quote. + break + } else if !single && parser.buffer[parser.buffer_pos] == '"' { + // It is a right double quote. + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) { + // It is an escaped line break. + if !cache(parser, 3) { + return false + } + skip(parser) + skip_line(parser) + leading_blanks = true + break + + } else if !single && parser.buffer[parser.buffer_pos] == '\\' { + // It is an escape sequence. + code_length := 0 + + // Check the escape character. + switch parser.buffer[parser.buffer_pos+1] { + case '0': + s = append(s, 0) + case 'a': + s = append(s, '\x07') + case 'b': + s = append(s, '\x08') + case 't', '\t': + s = append(s, '\x09') + case 'n': + s = append(s, '\x0A') + case 'v': + s = append(s, '\x0B') + case 'f': + s = append(s, '\x0C') + case 'r': + s = append(s, '\x0D') + case 'e': + s = append(s, '\x1B') + case ' ': + s = append(s, '\x20') + case '"': + s = append(s, '"') + case '\'': + s = append(s, '\'') + case '\\': + s = append(s, '\\') + case 'N': // NEL (#x85) + s = append(s, '\xC2') + s = append(s, '\x85') + case '_': // #xA0 + s = append(s, '\xC2') + s = append(s, '\xA0') + case 'L': // LS (#x2028) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA8') + case 'P': // PS (#x2029) + s = append(s, '\xE2') + s = append(s, '\x80') + s = append(s, '\xA9') + case 'x': + code_length = 2 + case 'u': + code_length = 4 + case 'U': + code_length = 8 + default: + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found unknown escape character") + return false + } + + skip(parser) + skip(parser) + + // Consume an arbitrary escape code. + if code_length > 0 { + var value int + + // Scan the character value. + if !cache(parser, code_length) { + return false + } + for k := 0; k < code_length; k++ { + if !is_hex(parser.buffer, parser.buffer_pos+k) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "did not find expected hexdecimal number") + return false + } + value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k) + } + + // Check the value and write the character. + if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found invalid Unicode character escape code") + return false + } + if value <= 0x7F { + s = append(s, byte(value)) + } else if value <= 0x7FF { + s = append(s, byte(0xC0+(value>>6))) + s = append(s, byte(0x80+(value&0x3F))) + } else if value <= 0xFFFF { + s = append(s, byte(0xE0+(value>>12))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } else { + s = append(s, byte(0xF0+(value>>18))) + s = append(s, byte(0x80+((value>>12)&0x3F))) + s = append(s, byte(0x80+((value>>6)&0x3F))) + s = append(s, byte(0x80+(value&0x3F))) + } + + // Advance the pointer. + for k := 0; k < code_length; k++ { + skip(parser) + } + } + } else { + // It is a non-escaped non-blank character. + if !read(parser, &s) { + return false + } + } + if !cache(parser, 2) { + return false + } + } + + // Check if we are at the end of the scalar. + if single { + if parser.buffer[parser.buffer_pos] == '\'' { + break + } + } else { + if parser.buffer[parser.buffer_pos] == '"' { + break + } + } + + // Consume blank characters. + if !cache(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + // Consume a space or a tab character. + if !leading_blanks { + if !read(parser, &whitespaces) { + return false + } + } else { + skip(parser) + } + } else { + if !cache(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + if !read_line(parser, &leading_break) { + return false + } + leading_blanks = true + } else { + if !read_line(parser, &trailing_breaks) { + return false + } + } + } + if !cache(parser, 1) { + return false + } + } + + // Join the whitespaces or fold line breaks. + if leading_blanks { + // Do we need to fold line breaks? + if leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Eat the right quote. + skip(parser) + end_mark := parser.mark + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.scalar.value = s + if single { + token.scalar.style = yaml_SINGLE_QUOTED_SCALAR_STYLE + } else { + token.scalar.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } + return true +} + +// Scan a plain scalar. +func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool { + var s, leading_break, trailing_breaks, whitespaces []byte + var leading_blanks bool + var indent = parser.indent + 1 + + start_mark := parser.mark + end_mark := parser.mark + + // Consume the content of the plain scalar. + for { + // Check for a document indicator. + if !cache(parser, 4) { + return false + } + if parser.mark.column == 0 && + ((parser.buffer[parser.buffer_pos+0] == '-' && + parser.buffer[parser.buffer_pos+1] == '-' && + parser.buffer[parser.buffer_pos+2] == '-') || + (parser.buffer[parser.buffer_pos+0] == '.' && + parser.buffer[parser.buffer_pos+1] == '.' && + parser.buffer[parser.buffer_pos+2] == '.')) && + is_blankz(parser.buffer, parser.buffer_pos+3) { + break + } + + // Check for a comment. + if parser.buffer[parser.buffer_pos] == '#' { + break + } + + // Consume non-blank characters. + for !is_blankz(parser.buffer, parser.buffer_pos) { + + // Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". + if parser.flow_level > 0 && + parser.buffer[parser.buffer_pos] == ':' && + !is_blankz(parser.buffer, parser.buffer_pos+1) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found unexpected ':'") + return false + } + + // Check for indicators that may end a plain scalar. + if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || + (parser.flow_level > 0 && + (parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == ':' || + parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || + parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || + parser.buffer[parser.buffer_pos] == '}')) { + break + } + + // Check if we need to join whitespaces and breaks. + if leading_blanks || len(whitespaces) > 0 { + if leading_blanks { + // Do we need to fold line breaks? + if leading_break[0] == '\n' { + if len(trailing_breaks) == 0 { + s = append(s, ' ') + } else { + s = append(s, trailing_breaks...) + } + } else { + s = append(s, leading_break...) + s = append(s, trailing_breaks...) + } + trailing_breaks = trailing_breaks[:0] + leading_break = leading_break[:0] + leading_blanks = false + } else { + s = append(s, whitespaces...) + whitespaces = whitespaces[:0] + } + } + + // Copy the character. + if !read(parser, &s) { + return false + } + + end_mark = parser.mark + if !cache(parser, 2) { + return false + } + } + + // Is it the end? + if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) { + break + } + + // Consume blank characters. + if !cache(parser, 1) { + return false + } + + for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { + if is_blank(parser.buffer, parser.buffer_pos) { + + // Check for tab character that abuse intendation. + if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found a tab character that violate intendation") + return false + } + + // Consume a space or a tab character. + if !leading_blanks { + if !read(parser, &whitespaces) { + return false + } + } else { + skip(parser) + } + } else { + if !cache(parser, 2) { + return false + } + + // Check if it is a first line break. + if !leading_blanks { + whitespaces = whitespaces[:0] + if !read_line(parser, &leading_break) { + return false + } + leading_blanks = true + } else { + if !read_line(parser, &trailing_breaks) { + return false + } + } + } + if !cache(parser, 1) { + return false + } + } + + // Check intendation level. + if parser.flow_level == 0 && parser.mark.column < indent { + break + } + } + + // Create a token. + *token = yaml_token_t{ + typ: yaml_SCALAR_TOKEN, + start_mark: start_mark, + end_mark: end_mark, + } + token.scalar.value = s + token.scalar.style = yaml_PLAIN_SCALAR_STYLE + + // Note that we change the 'simple_key_allowed' flag. + if leading_blanks { + parser.simple_key_allowed = true + } + return true +}
diff --git a/yaml_h.go b/yaml_h.go new file mode 100644 index 0000000..ad23067 --- /dev/null +++ b/yaml_h.go
@@ -0,0 +1,1513 @@ +package goyaml + +import ( + "io" +) + +// The version directive data. +type yaml_version_directive_t struct { + major int // The major version number. + minor int // The minor version number. +} + +// The tag directive data. +type yaml_tag_directive_t struct { + handle []byte // The tag handle. + prefix []byte // The tag prefix. +} + +type yaml_encoding_t int + +// The stream encoding. +const ( + // Let the parser choose the encoding. + yaml_ANY_ENCODING yaml_encoding_t = iota + + yaml_UTF8_ENCODING // The default UTF-8 encoding. + yaml_UTF16LE_ENCODING // The UTF-16-LE encoding with BOM. + yaml_UTF16BE_ENCODING // The UTF-16-BE encoding with BOM. +) + +type yaml_break_t int + +// Line break types. +const ( + // Let the parser choose the break type. + yaml_ANY_BREAK yaml_break_t = iota + + yaml_CR_BREAK // Use CR for line breaks (Mac style). + yaml_LN_BREAK // Use LN for line breaks (Unix style). + yaml_CRLN_BREAK // Use CR LN for line breaks (DOS style). +) + +type yaml_error_type_t int + +// Many bad things could happen with the parser and emitter. +const ( + // No error is produced. + yaml_NO_ERROR yaml_error_type_t = iota + + yaml_MEMORY_ERROR // Cannot allocate or reallocate a block of memory. + yaml_READER_ERROR // Cannot read or decode the input stream. + yaml_SCANNER_ERROR // Cannot scan the input stream. + yaml_PARSER_ERROR // Cannot parse the input stream. + yaml_COMPOSER_ERROR // Cannot compose a YAML document. + yaml_WRITER_ERROR // Cannot write to the output stream. + yaml_EMITTER_ERROR // Cannot emit a YAML stream. +) + +// The pointer position. +type yaml_mark_t struct { + index int // The position index. + line int // The position line. + column int // The position column. +} + +// Node Styles + +type yaml_scalar_style_t int + +// Scalar styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SCALAR_STYLE yaml_scalar_style_t = iota + + yaml_PLAIN_SCALAR_STYLE // The plain scalar style. + yaml_SINGLE_QUOTED_SCALAR_STYLE // The single-quoted scalar style. + yaml_DOUBLE_QUOTED_SCALAR_STYLE // The double-quoted scalar style. + yaml_LITERAL_SCALAR_STYLE // The literal scalar style. + yaml_FOLDED_SCALAR_STYLE // The folded scalar style. +) + +type yaml_sequence_style_t int + +// Sequence styles. +const ( + // Let the emitter choose the style. + yaml_ANY_SEQUENCE_STYLE yaml_sequence_style_t = iota + + yaml_BLOCK_SEQUENCE_STYLE // The block sequence style. + yaml_FLOW_SEQUENCE_STYLE // The flow sequence style. +) + +type yaml_mapping_style_t int + +// Mapping styles. +const ( + // Let the emitter choose the style. + YAML_ANY_MAPPING_STYLE yaml_mapping_style_t = iota + + yaml_BLOCK_MAPPING_STYLE // The block mapping style. + yaml_FLOW_MAPPING_STYLE // The flow mapping style. +) + +// Tokens + +type yaml_token_type_t int + +// Token types. +const ( + // An empty token. + yaml_NO_TOKEN yaml_token_type_t = iota + + yaml_STREAM_START_TOKEN // A STREAM-START token. + yaml_STREAM_END_TOKEN // A STREAM-END token. + + yaml_VERSION_DIRECTIVE_TOKEN // A VERSION-DIRECTIVE token. + yaml_TAG_DIRECTIVE_TOKEN // A TAG-DIRECTIVE token. + yaml_DOCUMENT_START_TOKEN // A DOCUMENT-START token. + yaml_DOCUMENT_END_TOKEN // A DOCUMENT-END token. + + yaml_BLOCK_SEQUENCE_START_TOKEN // A BLOCK-SEQUENCE-START token. + yaml_BLOCK_MAPPING_START_TOKEN // A BLOCK-SEQUENCE-END token. + yaml_BLOCK_END_TOKEN // A BLOCK-END token. + + yaml_FLOW_SEQUENCE_START_TOKEN // A FLOW-SEQUENCE-START token. + yaml_FLOW_SEQUENCE_END_TOKEN // A FLOW-SEQUENCE-END token. + yaml_FLOW_MAPPING_START_TOKEN // A FLOW-MAPPING-START token. + yaml_FLOW_MAPPING_END_TOKEN // A FLOW-MAPPING-END token. + + yaml_BLOCK_ENTRY_TOKEN // A BLOCK-ENTRY token. + yaml_FLOW_ENTRY_TOKEN // A FLOW-ENTRY token. + yaml_KEY_TOKEN // A KEY token. + yaml_VALUE_TOKEN // A VALUE token. + + yaml_ALIAS_TOKEN // An ALIAS token. + yaml_ANCHOR_TOKEN // An ANCHOR token. + yaml_TAG_TOKEN // A TAG token. + yaml_SCALAR_TOKEN // A SCALAR token. +) + +// The token structure. +type yaml_token_t struct { + + // The token type. + typ yaml_token_type_t + + // The token data. + + // The stream start (for yaml_STREAM_START_TOKEN). + stream_start struct { + encoding yaml_encoding_t // The stream encoding. + } + + // The alias (for yaml_ALIAS_TOKEN). + alias struct { + value []byte // The alias value. + } + + // The anchor (for yaml_ANCHOR_TOKEN). + anchor struct { + value []byte // The anchor value. + } + + // The tag (for yaml_TAG_TOKEN). + tag struct { + handle []byte // The tag handle. + suffix []byte // The tag suffix. + } + + // The scalar value (for yaml_SCALAR_TOKEN). + scalar struct { + value []byte // The scalar value. + style yaml_scalar_style_t // The scalar style. + } + + // The version directive (for yaml_VERSION_DIRECTIVE_TOKEN). + version_directive struct { + major int // The major version number. + minor int // The minor version number. + } + + // The tag directive (for yaml_TAG_DIRECTIVE_TOKEN). + tag_directive struct { + handle []byte // The tag handle. + prefix []byte // The tag prefix. + } + + // The beginning of the token. + start_mark yaml_mark_t + // The end of the token. + end_mark yaml_mark_t +} + +// Events + +type yaml_event_type_t int + +// Event types. +const ( + // An empty event. + yaml_NO_EVENT yaml_event_type_t = iota + + yaml_STREAM_START_EVENT // A STREAM-START event. + yaml_STREAM_END_EVENT // A STREAM-END event. + yaml_DOCUMENT_START_EVENT // A DOCUMENT-START event. + yaml_DOCUMENT_END_EVENT // A DOCUMENT-END event. + yaml_ALIAS_EVENT // An ALIAS event. + yaml_SCALAR_EVENT // A SCALAR event. + yaml_SEQUENCE_START_EVENT // A SEQUENCE-START event. + yaml_SEQUENCE_END_EVENT // A SEQUENCE-END event. + yaml_MAPPING_START_EVENT // A MAPPING-START event. + yaml_MAPPING_END_EVENT // A MAPPING-END event. +) + +// The event structure. +type yaml_event_t struct { + + // The event type. + typ yaml_event_type_t + + // The event data. + + // The stream parameters (for yaml_STREAM_START_EVENT). + stream_start struct { + encoding yaml_encoding_t // The document encoding. + } + + // The document parameters (for yaml_DOCUMENT_START_EVENT). + document_start struct { + version_directive *yaml_version_directive_t // The version directive. + + // The list of tag directives. + tag_directives []yaml_tag_directive_t + implicit bool // Is the document indicator implicit? + } + + // The document end parameters (for yaml_DOCUMENT_END_EVENT). + document_end struct { + implicit bool // Is the document end indicator implicit? + } + + // The alias parameters (for yaml_ALIAS_EVENT). + alias struct { + anchor []byte // The anchor. + } + + // The scalar parameters (for yaml_SCALAR_EVENT). + scalar struct { + anchor []byte // The anchor. + tag []byte // The tag. + value []byte // The scalar value. + length int // The length of the scalar value. + plain_implicit bool // Is the tag optional for the plain style? + quoted_implicit bool // Is the tag optional for any non-plain style? + style yaml_scalar_style_t // The scalar style. + } + + // The sequence parameters (for yaml_SEQUENCE_START_EVENT). + sequence_start struct { + anchor []byte // The anchor. + tag []byte // The tag. + implicit bool // Is the tag optional? + style yaml_sequence_style_t // The sequence style. + } + + // The mapping parameters (for yaml_MAPPING_START_EVENT). + mapping_start struct { + anchor []byte // The anchor. + tag []byte // The tag. + implicit bool // Is the tag optional? + style yaml_mapping_style_t // The mapping style. + } + + start_mark yaml_mark_t // The beginning of the event. + end_mark yaml_mark_t // The end of the event. + +} + +///** +// * Create the STREAM-START event. +// * +// * @param[out] event An empty event object. +// * @param[in] encoding The stream encoding. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_stream_start_event_initialize(yaml_event_t *event, +// yaml_encoding_t encoding); +// +///** +// * Create the STREAM-END event. +// * +// * @param[out] event An empty event object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_stream_end_event_initialize(yaml_event_t *event); +// +///** +// * Create the DOCUMENT-START event. +// * +// * The @a implicit argument is considered as a stylistic parameter and may be +// * ignored by the emitter. +// * +// * @param[out] event An empty event object. +// * @param[in] version_directive The %YAML directive value or +// * @c NULL. +// * @param[in] tag_directives_start The beginning of the %TAG +// * directives list. +// * @param[in] tag_directives_end The end of the %TAG directives +// * list. +// * @param[in] implicit If the document start indicator is +// * implicit. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_start_event_initialize(yaml_event_t *event, +// yaml_version_directive_t *version_directive, +// yaml_tag_directive_t *tag_directives_start, +// yaml_tag_directive_t *tag_directives_end, +// int implicit); +// +///** +// * Create the DOCUMENT-END event. +// * +// * The @a implicit argument is considered as a stylistic parameter and may be +// * ignored by the emitter. +// * +// * @param[out] event An empty event object. +// * @param[in] implicit If the document end indicator is implicit. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_end_event_initialize(yaml_event_t *event, int implicit); +// +///** +// * Create an ALIAS event. +// * +// * @param[out] event An empty event object. +// * @param[in] anchor The anchor value. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor); +// +///** +// * Create a SCALAR event. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * Either the @a tag attribute or one of the @a plain_implicit and +// * @a quoted_implicit flags must be set. +// * +// * @param[out] event An empty event object. +// * @param[in] anchor The scalar anchor or @c NULL. +// * @param[in] tag The scalar tag or @c NULL. +// * @param[in] value The scalar value. +// * @param[in] length The length of the scalar value. +// * @param[in] plain_implicit If the tag may be omitted for the plain +// * style. +// * @param[in] quoted_implicit If the tag may be omitted for any +// * non-plain style. +// * @param[in] style The scalar style. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_scalar_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, +// yaml_char_t *value, int length, +// int plain_implicit, int quoted_implicit, +// yaml_scalar_style_t style); +// +///** +// * Create a SEQUENCE-START event. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * Either the @a tag attribute or the @a implicit flag must be set. +// * +// * @param[out] event An empty event object. +// * @param[in] anchor The sequence anchor or @c NULL. +// * @param[in] tag The sequence tag or @c NULL. +// * @param[in] implicit If the tag may be omitted. +// * @param[in] style The sequence style. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_sequence_start_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, int implicit, +// yaml_sequence_style_t style); +// +///** +// * Create a SEQUENCE-END event. +// * +// * @param[out] event An empty event object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_sequence_end_event_initialize(yaml_event_t *event); +// +///** +// * Create a MAPPING-START event. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * Either the @a tag attribute or the @a implicit flag must be set. +// * +// * @param[out] event An empty event object. +// * @param[in] anchor The mapping anchor or @c NULL. +// * @param[in] tag The mapping tag or @c NULL. +// * @param[in] implicit If the tag may be omitted. +// * @param[in] style The mapping style. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_mapping_start_event_initialize(yaml_event_t *event, +// yaml_char_t *anchor, yaml_char_t *tag, int implicit, +// yaml_mapping_style_t style); +// +///** +// * Create a MAPPING-END event. +// * +// * @param[out] event An empty event object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_mapping_end_event_initialize(yaml_event_t *event); +// +///** +// * Free any memory allocated for an event object. +// * +// * @param[in,out] event An event object. +// */ +// +//YAML_DECLARE(void) +//yaml_event_delete(yaml_event_t *event); +// +//// @} +// + +// Nodes + +const ( + yaml_NULL_TAG = "tag:yaml.org,2002:null" // The tag !!null with the only possible value: null. + yaml_BOOL_TAG = "tag:yaml.org,2002:bool" // The tag !!bool with the values: true and false. + yaml_STR_TAG = "tag:yaml.org,2002:str" // The tag !!str for string values. + yaml_INT_TAG = "tag:yaml.org,2002:int" // The tag !!int for integer values. + yaml_FLOAT_TAG = "tag:yaml.org,2002:float" // The tag !!float for float values. + yaml_TIMESTAMP_TAG = "tag:yaml.org,2002:timestamp" // The tag !!timestamp for date and time values. + + yaml_SEQ_TAG = "tag:yaml.org,2002:seq" // The tag !!seq is used to denote sequences. + yaml_MAP_TAG = "tag:yaml.org,2002:map" // The tag !!map is used to denote mapping. + + yaml_DEFAULT_SCALAR_TAG = yaml_STR_TAG // The default scalar tag is !!str. + yaml_DEFAULT_SEQUENCE_TAG = yaml_SEQ_TAG // The default sequence tag is !!seq. + yaml_DEFAULT_MAPPING_TAG = yaml_MAP_TAG // The default mapping tag is !!map. +) + +type yaml_node_type_t int + +// Node types. +const ( + // An empty node. + yaml_NO_NODE yaml_node_type_t = iota + + yaml_SCALAR_NODE // A scalar node. + yaml_SEQUENCE_NODE // A sequence node. + yaml_MAPPING_NODE // A mapping node. +) + +// An element of a sequence node. +type yaml_node_item_t int + +// An element of a mapping node. +type yaml_node_pair_t struct { + key int // The key of the element. + value int // The value of the element. +} + +// The node structure. +type yaml_node_t struct { + typ yaml_node_type_t // The node type. + tag []byte // The node tag. + + // The node data. + + // The scalar parameters (for yaml_SCALAR_NODE). + scalar struct { + value []byte // The scalar value. + length int // The length of the scalar value. + style yaml_scalar_style_t // The scalar style. + } + + // The sequence parameters (for YAML_SEQUENCE_NODE). + sequence struct { + items_data []yaml_node_item_t // The stack of sequence items. + style yaml_sequence_style_t // The sequence style. + } + + // The mapping parameters (for yaml_MAPPING_NODE). + mapping struct { + pairs_data []yaml_node_pair_t // The stack of mapping pairs (key, value). + pairs_start *yaml_node_pair_t // The beginning of the stack. + pairs_end *yaml_node_pair_t // The end of the stack. + pairs_top *yaml_node_pair_t // The top of the stack. + style yaml_mapping_style_t // The mapping style. + } + + start_mark yaml_mark_t // The beginning of the node. + end_mark yaml_mark_t // The end of the node. + +} + +// The document structure. +type yaml_document_t struct { + + // The document nodes. + nodes []yaml_node_t + + // The version directive. + version_directive *yaml_version_directive_t + + // The list of tag directives. + tag_directives_data []yaml_tag_directive_t + tag_directives_start int // The beginning of the tag directives list. + tag_directives_end int // The end of the tag directives list. + + start_implicit int // Is the document start indicator implicit? + end_implicit int // Is the document end indicator implicit? + + start_mark yaml_mark_t // The beginning of the document. + end_mark yaml_mark_t // The end of the document. + +} + +///** +// * Create a YAML document. +// * +// * @param[out] document An empty document object. +// * @param[in] version_directive The %YAML directive value or +// * @c NULL. +// * @param[in] tag_directives_start The beginning of the %TAG +// * directives list. +// * @param[in] tag_directives_end The end of the %TAG directives +// * list. +// * @param[in] start_implicit If the document start indicator is +// * implicit. +// * @param[in] end_implicit If the document end indicator is +// * implicit. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_initialize(yaml_document_t *document, +// yaml_version_directive_t *version_directive, +// yaml_tag_directive_t *tag_directives_start, +// yaml_tag_directive_t *tag_directives_end, +// int start_implicit, int end_implicit); +// +///** +// * Delete a YAML document and all its nodes. +// * +// * @param[in,out] document A document object. +// */ +// +//YAML_DECLARE(void) +//yaml_document_delete(yaml_document_t *document); +// +///** +// * Get a node of a YAML document. +// * +// * The pointer returned by this function is valid until any of the functions +// * modifying the documents are called. +// * +// * @param[in] document A document object. +// * @param[in] index The node id. +// * +// * @returns the node objct or @c NULL if @c node_id is out of range. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_node(yaml_document_t *document, int index); +// +///** +// * Get the root of a YAML document node. +// * +// * The root object is the first object added to the document. +// * +// * The pointer returned by this function is valid until any of the functions +// * modifying the documents are called. +// * +// * An empty document produced by the parser signifies the end of a YAML +// * stream. +// * +// * @param[in] document A document object. +// * +// * @returns the node object or @c NULL if the document is empty. +// */ +// +//YAML_DECLARE(yaml_node_t *) +//yaml_document_get_root_node(yaml_document_t *document); +// +///** +// * Create a SCALAR node and attach it to the document. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * @param[in,out] document A document object. +// * @param[in] tag The scalar tag. +// * @param[in] value The scalar value. +// * @param[in] length The length of the scalar value. +// * @param[in] style The scalar style. +// * +// * @returns the node id or @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_scalar(yaml_document_t *document, +// yaml_char_t *tag, yaml_char_t *value, int length, +// yaml_scalar_style_t style); +// +///** +// * Create a SEQUENCE node and attach it to the document. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * @param[in,out] document A document object. +// * @param[in] tag The sequence tag. +// * @param[in] style The sequence style. +// * +// * @returns the node id or @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_sequence(yaml_document_t *document, +// yaml_char_t *tag, yaml_sequence_style_t style); +// +///** +// * Create a MAPPING node and attach it to the document. +// * +// * The @a style argument may be ignored by the emitter. +// * +// * @param[in,out] document A document object. +// * @param[in] tag The sequence tag. +// * @param[in] style The sequence style. +// * +// * @returns the node id or @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_add_mapping(yaml_document_t *document, +// yaml_char_t *tag, yaml_mapping_style_t style); +// +///** +// * Add an item to a SEQUENCE node. +// * +// * @param[in,out] document A document object. +// * @param[in] sequence The sequence node id. +// * @param[in] item The item node id. +//* +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_sequence_item(yaml_document_t *document, +// int sequence, int item); +// +///** +// * Add a pair of a key and a value to a MAPPING node. +// * +// * @param[in,out] document A document object. +// * @param[in] mapping The mapping node id. +// * @param[in] key The key node id. +// * @param[in] value The value node id. +//* +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_document_append_mapping_pair(yaml_document_t *document, +// int mapping, int key, int value); +// + +// Parser Definitions + +// The prototype of a read handler. +// +// The read handler is called when the parser needs to read more bytes from the +// source. The handler should write not more than size bytes to the buffer. +// The number of written bytes should be set to the size_read variable. +// +// [in,out] data A pointer to an application data specified by +// yaml_parser_set_input(). +// [out] buffer The buffer to write the data from the source. +// [in] size The size of the buffer. +// [out] size_read The actual number of bytes read from the source. +// +// On success, the handler should return 1. If the handler failed, +// the returned value should be 0. On EOF, the handler should set the +// size_read to 0 and return 1. +type yaml_read_handler_t func(parser *yaml_parser_t, buffer []byte) (n int, err error) + +// This structure holds information about a potential simple key. +type yaml_simple_key_t struct { + possible bool // Is a simple key possible? + required bool // Is a simple key required? + token_number int // The number of the token. + mark yaml_mark_t // The position mark. +} + +// The states of the parser. +type yaml_parser_state_t int + +const ( + yaml_PARSE_STREAM_START_STATE yaml_parser_state_t = iota + + yaml_PARSE_IMPLICIT_DOCUMENT_START_STATE // Expect the beginning of an implicit document. + yaml_PARSE_DOCUMENT_START_STATE // Expect DOCUMENT-START. + yaml_PARSE_DOCUMENT_CONTENT_STATE // Expect the content of a document. + yaml_PARSE_DOCUMENT_END_STATE // Expect DOCUMENT-END. + yaml_PARSE_BLOCK_NODE_STATE // Expect a block node. + yaml_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE // Expect a block node or indentless sequence. + yaml_PARSE_FLOW_NODE_STATE // Expect a flow node. + yaml_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a block sequence. + yaml_PARSE_BLOCK_SEQUENCE_ENTRY_STATE // Expect an entry of a block sequence. + yaml_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE // Expect an entry of an indentless sequence. + yaml_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE // Expect the first key of a block mapping. + yaml_PARSE_BLOCK_MAPPING_KEY_STATE // Expect a block mapping key. + yaml_PARSE_BLOCK_MAPPING_VALUE_STATE // Expect a block mapping value. + yaml_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE // Expect the first entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_STATE // Expect an entry of a flow sequence. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE // Expect a key of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE // Expect a value of an ordered mapping. + yaml_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE // Expect the and of an ordered mapping entry. + yaml_PARSE_FLOW_MAPPING_FIRST_KEY_STATE // Expect the first key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_KEY_STATE // Expect a key of a flow mapping. + yaml_PARSE_FLOW_MAPPING_VALUE_STATE // Expect a value of a flow mapping. + yaml_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE // Expect an empty value of a flow mapping. + yaml_PARSE_END_STATE // Expect nothing. +) + +// This structure holds aliases data. +type yaml_alias_data_t struct { + anchor []byte // The anchor. + index int // The node id. + mark yaml_mark_t // The anchor mark. +} + +// The parser structure. +// +// All members are internal. Manage the structure using the +// yaml_parser_ family of functions. +type yaml_parser_t struct { + + // Error handling + + error yaml_error_type_t // Error type. + + problem string // Error description. + + // The byte about which the problem occured. + problem_offset int + problem_value int + problem_mark yaml_mark_t + + // The error context. + context string + context_mark yaml_mark_t + + // Reader stuff + + read_handler yaml_read_handler_t // Read handler. + + input_file io.Reader // File input data. + input []byte // String input data. + input_pos int + + eof bool // EOF flag + + buffer []byte // The working buffer. + buffer_pos int // The current position of the buffer. + + unread int // The number of unread characters in the buffer. + + raw_buffer []byte // The raw buffer. + raw_buffer_pos int // The current position of the buffer. + + encoding yaml_encoding_t // The input encoding. + + offset int // The offset of the current position (in bytes). + mark yaml_mark_t // The mark of the current position. + + // Scanner stuff + + stream_start_produced bool // Have we started to scan the input stream? + stream_end_produced bool // Have we reached the end of the input stream? + + flow_level int // The number of unclosed '[' and '{' indicators. + + tokens []yaml_token_t // The tokens queue. + tokens_head int // The head of the tokens queue. + tokens_parsed int // The number of tokens fetched from the queue. + token_available bool // Does the tokens queue contain a token ready for dequeueing. + + indent int // The current indentation level. + indents []int // The indentation levels stack. + + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + + // Parser stuff + + state yaml_parser_state_t // The current parser state. + states []yaml_parser_state_t // The parser states stack. + marks []yaml_mark_t // The stack of marks. + tag_directives []yaml_tag_directive_t // The list of TAG directives. + + // Dumper stuff + + aliases []yaml_alias_data_t // The alias data. + + document *yaml_document_t // The currently parsed document. +} + +// +///** +// * Initialize a parser. +// * +// * This function creates a new parser object. An application is responsible +// * for destroying the object using the yaml_parser_delete() function. +// * +// * @param[out] parser An empty parser object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_parser_initialize(yaml_parser_t *parser); +// +///** +// * Destroy a parser. +// * +// * @param[in,out] parser A parser object. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_delete(yaml_parser_t *parser); +// +///** +// * Set a string input. +// * +// * Note that the @a input pointer must be valid while the @a parser object +// * exists. The application is responsible for destroing @a input after +// * destroying the @a parser. +// * +// * @param[in,out] parser A parser object. +// * @param[in] input A source data. +// * @param[in] size The length of the source data in bytes. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_set_input_string(yaml_parser_t *parser, +// unsigned char *input, int size); +// +///** +// * Set a file input. +// * +// * @a file should be a file object open for reading. The application is +// * responsible for closing the @a file. +// * +// * @param[in,out] parser A parser object. +// * @param[in] file An open file. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file); +// +///** +// * Set a generic input handler. +// * +// * @param[in,out] parser A parser object. +// * @param[in] handler A read handler. +// * @param[in] data Any application data for passing to the read +// * handler. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_set_input(yaml_parser_t *parser, +// yaml_read_handler_t *handler, void *data); +// +///** +// * Set the source encoding. +// * +// * @param[in,out] parser A parser object. +// * @param[in] encoding The source encoding. +// */ +// +//YAML_DECLARE(void) +//yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); +// +///** +// * Scan the input stream and produce the next token. +// * +// * Call the function subsequently to produce a sequence of tokens corresponding +// * to the input stream. The initial token has the type +// * yaml_STREAM_START_TOKEN while the ending token has the type +// * yaml_STREAM_END_TOKEN. +// * +// * An application is responsible for freeing any buffers associated with the +// * produced token object using the @c yaml_token_delete function. +// * +// * An application must not alternate the calls of yaml_parser_scan() with the +// * calls of yaml_parser_parse() or yaml_parser_load(). Doing this will break +// * the parser. +// * +// * @param[in,out] parser A parser object. +// * @param[out] token An empty token object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); +// +///** +// * Parse the input stream and produce the next parsing event. +// * +// * Call the function subsequently to produce a sequence of events corresponding +// * to the input stream. The initial event has the type +// * yaml_STREAM_START_EVENT while the ending event has the type +// * yaml_STREAM_END_EVENT. +// * +// * An application is responsible for freeing any buffers associated with the +// * produced event object using the yaml_event_delete() function. +// * +// * An application must not alternate the calls of yaml_parser_parse() with the +// * calls of yaml_parser_scan() or yaml_parser_load(). Doing this will break the +// * parser. +// * +// * @param[in,out] parser A parser object. +// * @param[out] event An empty event object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); +// +///** +// * Parse the input stream and produce the next YAML document. +// * +// * Call this function subsequently to produce a sequence of documents +// * constituting the input stream. +// * +// * If the produced document has no root node, it means that the document +// * end has been reached. +// * +// * An application is responsible for freeing any data associated with the +// * produced document object using the yaml_document_delete() function. +// * +// * An application must not alternate the calls of yaml_parser_load() with the +// * calls of yaml_parser_scan() or yaml_parser_parse(). Doing this will break +// * the parser. +// * +// * @param[in,out] parser A parser object. +// * @param[out] document An empty document object. +// * +// * @return @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_parser_load(yaml_parser_t *parser, yaml_document_t *document); +// +//// @} +// +///** +// * @defgroup emitter Emitter Definitions +// * @{ +// */ +// +///** +// * The prototype of a write handler. +// * +// * The write handler is called when the emitter needs to flush the accumulated +// * characters to the output. The handler should write @a size bytes of the +// * @a buffer to the output. +// * +// * @param[in,out] data A pointer to an application data specified by +// * yaml_emitter_set_output(). +// * @param[in] buffer The buffer with bytes to be written. +// * @param[in] size The size of the buffer. +// * +// * @returns On success, the handler should return @c 1. If the handler failed, +// * the returned value should be @c 0. +// */ +// +//typedef int yaml_write_handler_t(void *data, unsigned char *buffer, int size); +// +//// The emitter states. +//typedef enum yaml_emitter_state_e { +// // Expect STREAM-START. +// YAML_EMIT_STREAM_START_STATE, +// // Expect the first DOCUMENT-START or STREAM-END. +// YAML_EMIT_FIRST_DOCUMENT_START_STATE, +// // Expect DOCUMENT-START or STREAM-END. +// YAML_EMIT_DOCUMENT_START_STATE, +// // Expect the content of a document. +// YAML_EMIT_DOCUMENT_CONTENT_STATE, +// // Expect DOCUMENT-END. +// YAML_EMIT_DOCUMENT_END_STATE, +// // Expect the first item of a flow sequence. +// YAML_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE, +// // Expect an item of a flow sequence. +// YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE, +// // Expect the first key of a flow mapping. +// YAML_EMIT_FLOW_MAPPING_FIRST_KEY_STATE, +// // Expect a key of a flow mapping. +// YAML_EMIT_FLOW_MAPPING_KEY_STATE, +// // Expect a value for a simple key of a flow mapping. +// YAML_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE, +// // Expect a value of a flow mapping. +// YAML_EMIT_FLOW_MAPPING_VALUE_STATE, +// // Expect the first item of a block sequence. +// YAML_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE, +// // Expect an item of a block sequence. +// YAML_EMIT_BLOCK_SEQUENCE_ITEM_STATE, +// // Expect the first key of a block mapping. +// YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, +// // Expect the key of a block mapping. +// YAML_EMIT_BLOCK_MAPPING_KEY_STATE, +// // Expect a value for a simple key of a block mapping. +// YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, +// // Expect a value of a block mapping. +// YAML_EMIT_BLOCK_MAPPING_VALUE_STATE, +// // Expect nothing. +// YAML_EMIT_END_STATE +//} yaml_emitter_state_t; +// +///** +// * The emitter structure. +// * +// * All members are internal. Manage the structure using the @c yaml_emitter_ +// * family of functions. +// */ +// +//typedef struct yaml_emitter_s { +// +// /** +// * @name Error handling +// * @{ +// */ +// +// // Error type. +// yaml_error_type_t error; +// // Error description. +// char *problem; +// +// /** +// * @} +// */ +// +// /** +// * @name Writer stuff +// * @{ +// */ +// +// // Write handler. +// yaml_write_handler_t *write_handler; +// +// // A pointer for passing to the white handler. +// void *write_handler_data; +// +// // Standard (string or file) output data. +// union { +// // String output data. +// struct { +// // The buffer pointer. +// unsigned char *buffer; +// // The buffer size. +// int size; +// // The number of written bytes. +// int *size_written; +// } string; +// +// // File output data. +// FILE *file; +// } output; +// +// // The working buffer. +// struct { +// // The beginning of the buffer. +// yaml_char_t *start; +// // The end of the buffer. +// yaml_char_t *end; +// // The current position of the buffer. +// yaml_char_t *pointer; +// // The last filled position of the buffer. +// yaml_char_t *last; +// } buffer; +// +// // The raw buffer. +// struct { +// // The beginning of the buffer. +// unsigned char *start; +// // The end of the buffer. +// unsigned char *end; +// // The current position of the buffer. +// unsigned char *pointer; +// // The last filled position of the buffer. +// unsigned char *last; +// } raw_buffer; +// +// // The stream encoding. +// yaml_encoding_t encoding; +// +// /** +// * @} +// */ +// +// /** +// * @name Emitter stuff +// * @{ +// */ +// +// // If the output is in the canonical style? +// int canonical; +// // The number of indentation spaces. +// int best_indent; +// // The preferred width of the output lines. +// int best_width; +// // Allow unescaped non-ASCII characters? +// int unicode; +// // The preferred line break. +// yaml_break_t line_break; +// +// // The stack of states. +// struct { +// // The beginning of the stack. +// yaml_emitter_state_t *start; +// // The end of the stack. +// yaml_emitter_state_t *end; +// // The top of the stack. +// yaml_emitter_state_t *top; +// } states; +// +// // The current emitter state. +// yaml_emitter_state_t state; +// +// // The event queue. +// struct { +// // The beginning of the event queue. +// yaml_event_t *start; +// // The end of the event queue. +// yaml_event_t *end; +// // The head of the event queue. +// yaml_event_t *head; +// // The tail of the event queue. +// yaml_event_t *tail; +// } events; +// +// // The stack of indentation levels. +// struct { +// // The beginning of the stack. +// int *start; +// // The end of the stack. +// int *end; +// // The top of the stack. +// int *top; +// } indents; +// +// // The list of tag directives. +// struct { +// // The beginning of the list. +// yaml_tag_directive_t *start; +// // The end of the list. +// yaml_tag_directive_t *end; +// // The top of the list. +// yaml_tag_directive_t *top; +// } tag_directives; +// +// // The current indentation level. +// int indent; +// +// // The current flow level. +// int flow_level; +// +// // Is it the document root context? +// int root_context; +// // Is it a sequence context? +// int sequence_context; +// // Is it a mapping context? +// int mapping_context; +// // Is it a simple mapping key context? +// int simple_key_context; +// +// // The current line. +// int line; +// // The current column. +// int column; +// // If the last character was a whitespace? +// int whitespace; +// // If the last character was an indentation character (' ', '-', '?', ':')? +// int indention; +// // If an explicit document end is required? +// int open_ended; +// +// // Anchor analysis. +// struct { +// // The anchor value. +// yaml_char_t *anchor; +// // The anchor length. +// int anchor_length; +// // Is it an alias? +// int alias; +// } anchor_data; +// +// // Tag analysis. +// struct { +// // The tag handle. +// yaml_char_t *handle; +// // The tag handle length. +// int handle_length; +// // The tag suffix. +// yaml_char_t *suffix; +// // The tag suffix length. +// int suffix_length; +// } tag_data; +// +// // Scalar analysis. +// struct { +// // The scalar value. +// yaml_char_t *value; +// // The scalar length. +// int length; +// // Does the scalar contain line breaks? +// int multiline; +// // Can the scalar be expessed in the flow plain style? +// int flow_plain_allowed; +// // Can the scalar be expressed in the block plain style? +// int block_plain_allowed; +// // Can the scalar be expressed in the single quoted style? +// int single_quoted_allowed; +// // Can the scalar be expressed in the literal or folded styles? +// int block_allowed; +// // The output style. +// yaml_scalar_style_t style; +// } scalar_data; +// +// /** +// * @} +// */ +// +// /** +// * @name Dumper stuff +// * @{ +// */ +// +// // If the stream was already opened? +// int opened; +// // If the stream was already closed? +// int closed; +// +// // The information associated with the document nodes. +// struct { +// // The number of references. +// int references; +// // The anchor id. +// int anchor; +// // If the node has been emitted? +// int serialized; +// } *anchors; +// +// // The last assigned anchor id. +// int last_anchor_id; +// +// // The currently emitted document. +// yaml_document_t *document; +// +// /** +// * @} +// */ +// +//} yaml_emitter_t; +// +///** +// * Initialize an emitter. +// * +// * This function creates a new emitter object. An application is responsible +// * for destroying the object using the yaml_emitter_delete() function. +// * +// * @param[out] emitter An empty parser object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_initialize(yaml_emitter_t *emitter); +// +///** +// * Destroy an emitter. +// * +// * @param[in,out] emitter An emitter object. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_delete(yaml_emitter_t *emitter); +// +///** +// * Set a string output. +// * +// * The emitter will write the output characters to the @a output buffer of the +// * size @a size. The emitter will set @a size_written to the number of written +// * bytes. If the buffer is smaller than required, the emitter produces the +// * YAML_WRITE_ERROR error. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] output An output buffer. +// * @param[in] size The buffer size. +// * @param[in] size_written The pointer to save the number of written +// * bytes. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output_string(yaml_emitter_t *emitter, +// unsigned char *output, int size, size_t *size_written); +// +///** +// * Set a file output. +// * +// * @a file should be a file object open for writing. The application is +// * responsible for closing the @a file. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] file An open file. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output_file(yaml_emitter_t *emitter, FILE *file); +// +///** +// * Set a generic output handler. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] handler A write handler. +// * @param[in] data Any application data for passing to the write +// * handler. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_output(yaml_emitter_t *emitter, +// yaml_write_handler_t *handler, void *data); +// +///** +// * Set the output encoding. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] encoding The output encoding. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_encoding(yaml_emitter_t *emitter, yaml_encoding_t encoding); +// +///** +// * Set if the output should be in the "canonical" format as in the YAML +// * specification. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] canonical If the output is canonical. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical); +// +///** +// * Set the intendation increment. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] indent The indentation increment (1 < . < 10). +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent); +// +///** +// * Set the preferred line width. @c -1 means unlimited. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] width The preferred line width. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_width(yaml_emitter_t *emitter, int width); +// +///** +// * Set if unescaped non-ASCII characters are allowed. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] unicode If unescaped Unicode characters are allowed. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_unicode(yaml_emitter_t *emitter, int unicode); +// +///** +// * Set the preferred line break. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in] line_break The preferred line break. +// */ +// +//YAML_DECLARE(void) +//yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); +// +///** +// * Emit an event. +// * +// * The event object may be generated using the yaml_parser_parse() function. +// * The emitter takes the responsibility for the event object and destroys its +// * content after it is emitted. The event object is destroyed even if the +// * function fails. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in,out] event An event object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); +// +///** +// * Start a YAML stream. +// * +// * This function should be used before yaml_emitter_dump() is called. +// * +// * @param[in,out] emitter An emitter object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_open(yaml_emitter_t *emitter); +// +///** +// * Finish a YAML stream. +// * +// * This function should be used after yaml_emitter_dump() is called. +// * +// * @param[in,out] emitter An emitter object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_close(yaml_emitter_t *emitter); +// +///** +// * Emit a YAML document. +// * +// * The documen object may be generated using the yaml_parser_load() function +// * or the yaml_document_initialize() function. The emitter takes the +// * responsibility for the document object and destoys its content after +// * it is emitted. The document object is destroyedeven if the function fails. +// * +// * @param[in,out] emitter An emitter object. +// * @param[in,out] document A document object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_dump(yaml_emitter_t *emitter, yaml_document_t *document); +// +///** +// * Flush the accumulated characters to the output. +// * +// * @param[in,out] emitter An emitter object. +// * +// * @returns @c 1 if the function succeeded, @c 0 on error. +// */ +// +//YAML_DECLARE(int) +//yaml_emitter_flush(yaml_emitter_t *emitter); +// +//// @} +// +//#ifdef __cplusplus +//} +//#endif +// +//#endif /* #ifndef YAML_H */ +//
diff --git a/yaml_private_h.go b/yaml_private_h.go new file mode 100644 index 0000000..c01c7f3 --- /dev/null +++ b/yaml_private_h.go
@@ -0,0 +1,249 @@ +package goyaml + +const ( + // The size of the input raw buffer. + input_raw_buffer_size = 16384 + + // The size of the input buffer. + // It should be possible to decode the whole raw buffer. + input_buffer_size = input_raw_buffer_size * 3 + + // The size of the output buffer. + output_buffer_size = 16384 + + // The size of the output raw buffer. + // It should be possible to encode the whole output buffer. + output_raw_buffer_size = (output_buffer_size*2 + 2) + + // The size of other stacks and queues. + initial_stack_size = 16 + initial_queue_size = 16 + initial_string_size = 16 +) + +// Check if the character at the specified position is an alphabetical +// character, a digit, '_', or '-'. +func is_alpha(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'Z' || b[i] >= 'a' && b[i] <= 'z' || b[i] == '_' || b[i] == '-' +} + +// Check if the character at the specified position is a digit. +func is_digit(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' +} + +// Get the value of a digit. +func as_digit(b []byte, i int) int { + return int(b[i]) - '0' +} + +// Check if the character at the specified position is a hex-digit. +func is_hex(b []byte, i int) bool { + return b[i] >= '0' && b[i] <= '9' || b[i] >= 'A' && b[i] <= 'F' || b[i] >= 'a' && b[i] <= 'f' +} + +// Get the value of a hex-digit. +func as_hex(b []byte, i int) int { + bi := b[i] + if bi >= 'A' && bi <= 'F' { + return int(bi) - 'A' + 10 + } + if bi >= 'a' && bi <= 'f' { + return int(bi) - 'a' + 10 + } + return int(bi) - '0' +} + +// Check if the character is ASCII. +func is_ascii(b []byte, i int) bool { + return b[i] <= 0x7F +} + +// Check if the character at the start of the buffer can be printed unescaped. +func is_printable(b []byte, i int) bool { + return ((b[i] == 0x0A) || // . == #x0A + (b[i] >= 0x20 && b[i] <= 0x7E) || // #x20 <= . <= #x7E + (b[i] == 0xC2 && b[i+1] >= 0xA0) || // #0xA0 <= . <= #xD7FF + (b[i] > 0xC2 && b[i] < 0xED) || + (b[i] == 0xED && b[i+1] < 0xA0) || + (b[i] == 0xEE) || + (b[i] == 0xEF && // #xE000 <= . <= #xFFFD + !(b[i+1] == 0xBB && b[i+2] == 0xBF) && // && . != #xFEFF + !(b[i+1] == 0xBF && (b[i+2] == 0xBE || b[i+2] == 0xBF)))) +} + +// Check if the character at the specified position is NUL. +func is_z(b []byte, i int) bool { + return b[i] == 0x00 +} + +// Check if the beginning of the buffer is a BOM. +func is_bom(b []byte, i int) bool { + return b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF +} + +// Check if the character at the specified position is space. +func is_space(b []byte, i int) bool { + return b[i] == ' ' +} + +// Check if the character at the specified position is tab. +func is_tab(b []byte, i int) bool { + return b[i] == '\t' +} + +// Check if the character at the specified position is blank (space or tab). +func is_blank(b []byte, i int) bool { + return is_space(b, i) || is_tab(b, i) +} + +// Check if the character at the specified position is a line break. +func is_break(b []byte, i int) bool { + return (b[i] == '\r' || // CR (#xD) + b[i] == '\n' || // LF (#xA) + b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028) + b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA9) // PS (#x2029) +} + +func is_crlf(b []byte, i int) bool { + return b[i] == '\r' && b[i+1] == '\n' +} + +// Check if the character is a line break or NUL. +func is_breakz(b []byte, i int) bool { + return is_break(b, i) || b[i] == 0x00 +} + +// Check if the character is a line break, space, or NUL. +func is_spacez(b []byte, i int) bool { + return is_space(b, i) || is_breakz(b, i) +} + +// Check if the character is a line break, space, tab, or NUL. +func is_blankz(b []byte, i int) bool { + return is_blank(b, i) || is_breakz(b, i) +} + +// Determine the width of the character. +func width(b byte) int { + switch { + case b&0x80 == 0x00: + return 1 + case b&0xE0 == 0xC0: + return 2 + case b&0xF0 == 0xE0: + return 3 + case b&0xF8 == 0xF0: + return 4 + } + return 0 +} + + +///* +// * Event initializers. +// */ +// +//#define EVENT_INIT(event,event_type,event_start_mark,event_end_mark) +// (memset(&(event), 0, sizeof(yaml_event_t)), +// (event).type = (event_type), +// (event).start_mark = (event_start_mark), +// (event).end_mark = (event_end_mark)) +// + +//#define DOCUMENT_END_EVENT_INIT(event,event_implicit,start_mark,end_mark) +// (EVENT_INIT((event),YAML_DOCUMENT_END_EVENT,(start_mark),(end_mark)), +// (event).data.document_end.implicit = (event_implicit)) +// +//#define ALIAS_EVENT_INIT(event,event_anchor,start_mark,end_mark) +// (EVENT_INIT((event),YAML_ALIAS_EVENT,(start_mark),(end_mark)), +// (event).data.alias.anchor = (event_anchor)) +// +//#define SCALAR_EVENT_INIT(event,event_anchor,event_tag,event_value,event_length, +// event_plain_implicit, event_quoted_implicit,event_style,start_mark,end_mark) +// (EVENT_INIT((event),YAML_SCALAR_EVENT,(start_mark),(end_mark)), +// (event).data.scalar.anchor = (event_anchor), +// (event).data.scalar.tag = (event_tag), +// (event).data.scalar.value = (event_value), +// (event).data.scalar.length = (event_length), +// (event).data.scalar.plain_implicit = (event_plain_implicit), +// (event).data.scalar.quoted_implicit = (event_quoted_implicit), +// (event).data.scalar.style = (event_style)) +// +//#define SEQUENCE_START_EVENT_INIT(event,event_anchor,event_tag, +// event_implicit,event_style,start_mark,end_mark) +// (EVENT_INIT((event),YAML_SEQUENCE_START_EVENT,(start_mark),(end_mark)), +// (event).data.sequence_start.anchor = (event_anchor), +// (event).data.sequence_start.tag = (event_tag), +// (event).data.sequence_start.implicit = (event_implicit), +// (event).data.sequence_start.style = (event_style)) +// +//#define SEQUENCE_END_EVENT_INIT(event,start_mark,end_mark) +// (EVENT_INIT((event),YAML_SEQUENCE_END_EVENT,(start_mark),(end_mark))) +// +//#define MAPPING_START_EVENT_INIT(event,event_anchor,event_tag, +// event_implicit,event_style,start_mark,end_mark) +// (EVENT_INIT((event),YAML_MAPPING_START_EVENT,(start_mark),(end_mark)), +// (event).data.mapping_start.anchor = (event_anchor), +// (event).data.mapping_start.tag = (event_tag), +// (event).data.mapping_start.implicit = (event_implicit), +// (event).data.mapping_start.style = (event_style)) +// +//#define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) +// (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) +// +///* +// * Document initializer. +// */ +// +//#define DOCUMENT_INIT(document,document_nodes_start,document_nodes_end, +// document_version_directive,document_tag_directives_start, +// document_tag_directives_end,document_start_implicit, +// document_end_implicit,document_start_mark,document_end_mark) +// (memset(&(document), 0, sizeof(yaml_document_t)), +// (document).nodes.start = (document_nodes_start), +// (document).nodes.end = (document_nodes_end), +// (document).nodes.top = (document_nodes_start), +// (document).version_directive = (document_version_directive), +// (document).tag_directives.start = (document_tag_directives_start), +// (document).tag_directives.end = (document_tag_directives_end), +// (document).start_implicit = (document_start_implicit), +// (document).end_implicit = (document_end_implicit), +// (document).start_mark = (document_start_mark), +// (document).end_mark = (document_end_mark)) +// +///* +// * Node initializers. +// */ +// +//#define NODE_INIT(node,node_type,node_tag,node_start_mark,node_end_mark) +// (memset(&(node), 0, sizeof(yaml_node_t)), +// (node).type = (node_type), +// (node).tag = (node_tag), +// (node).start_mark = (node_start_mark), +// (node).end_mark = (node_end_mark)) +// +//#define SCALAR_NODE_INIT(node,node_tag,node_value,node_length, +// node_style,start_mark,end_mark) +// (NODE_INIT((node),YAML_SCALAR_NODE,(node_tag),(start_mark),(end_mark)), +// (node).data.scalar.value = (node_value), +// (node).data.scalar.length = (node_length), +// (node).data.scalar.style = (node_style)) +// +//#define SEQUENCE_NODE_INIT(node,node_tag,node_items_start,node_items_end, +// node_style,start_mark,end_mark) +// (NODE_INIT((node),YAML_SEQUENCE_NODE,(node_tag),(start_mark),(end_mark)), +// (node).data.sequence.items.start = (node_items_start), +// (node).data.sequence.items.end = (node_items_end), +// (node).data.sequence.items.top = (node_items_start), +// (node).data.sequence.style = (node_style)) +// +//#define MAPPING_NODE_INIT(node,node_tag,node_pairs_start,node_pairs_end, +// node_style,start_mark,end_mark) +// (NODE_INIT((node),YAML_MAPPING_NODE,(node_tag),(start_mark),(end_mark)), +// (node).data.mapping.pairs.start = (node_pairs_start), +// (node).data.mapping.pairs.end = (node_pairs_end), +// (node).data.mapping.pairs.top = (node_pairs_start), +// (node).data.mapping.style = (node_style)) +//