Support !!binary data. Fixes #31. Fixes #32.
diff --git a/decode.go b/decode.go index 5f00702..442f87d 100644 --- a/decode.go +++ b/decode.go
@@ -1,6 +1,7 @@ package yaml import ( + "encoding/base64" "reflect" "strconv" "time" @@ -216,7 +217,7 @@ var arg interface{} *out = reflect.ValueOf(&arg).Elem() return func() { - *good = setter.SetYAML(tag, arg) + *good = setter.SetYAML(shortTag(tag), arg) } } } @@ -224,7 +225,7 @@ for again { again = false setter, _ := (*out).Interface().(Setter) - if tag != "!!null" || setter != nil { + if tag != yaml_NULL_TAG || setter != nil { if pv := (*out); pv.Kind() == reflect.Ptr { if pv.IsNil() { *out = reflect.New(pv.Type().Elem()).Elem() @@ -240,7 +241,7 @@ var arg interface{} *out = reflect.ValueOf(&arg).Elem() return func() { - *good = setter.SetYAML(tag, arg) + *good = setter.SetYAML(shortTag(tag), arg) } } } @@ -302,10 +303,17 @@ var tag string var resolved interface{} if n.tag == "" && !n.implicit { - tag = "!!str" + tag = yaml_STR_TAG resolved = n.value } else { tag, resolved = resolve(n.tag, n.value) + if tag == yaml_BINARY_TAG { + data, err := base64.StdEncoding.DecodeString(resolved.(string)) + if err != nil { + fail("!!binary value contains invalid base64 data") + } + resolved = string(data) + } } if set := d.setter(tag, &out, &good); set != nil { defer set() @@ -321,7 +329,10 @@ } switch out.Kind() { case reflect.String: - if resolved != nil { + if tag == yaml_BINARY_TAG { + out.SetString(resolved.(string)) + good = true + } else if resolved != nil { out.SetString(n.value) good = true } @@ -413,7 +424,7 @@ } func (d *decoder) sequence(n *node, out reflect.Value) (good bool) { - if set := d.setter("!!seq", &out, &good); set != nil { + if set := d.setter(yaml_SEQ_TAG, &out, &good); set != nil { defer set() } var iface reflect.Value @@ -442,7 +453,7 @@ } func (d *decoder) mapping(n *node, out reflect.Value) (good bool) { - if set := d.setter("!!map", &out, &good); set != nil { + if set := d.setter(yaml_MAP_TAG, &out, &good); set != nil { defer set() } if out.Kind() == reflect.Struct { @@ -543,5 +554,5 @@ } func isMerge(n *node) bool { - return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == "!!merge" || n.tag == "tag:yaml.org,2002:merge") + return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG) }
diff --git a/decode_test.go b/decode_test.go index 7042908..cb68d4c 100644 --- a/decode_test.go +++ b/decode_test.go
@@ -5,6 +5,7 @@ "gopkg.in/yaml.v1" "math" "reflect" + "strings" "time" ) @@ -386,6 +387,18 @@ "a: 1:1\n", map[string]string{"a": "1:1"}, }, + + // Binary data. + { + "a: !!binary gIGC\n", + map[string]string{"a": "\x80\x81\x82"}, + }, { + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + map[string]string{"a": strings.Repeat("\x90", 54)}, + }, { + "a: !!binary |\n " + strings.Repeat("A", 70) + "\n ==\n", + map[string]string{"a": strings.Repeat("\x00", 52)}, + }, } type inlineB struct { @@ -433,12 +446,13 @@ var unmarshalErrorTests = []struct { data, error string }{ - {"v: !!float 'error'", "YAML error: Can't decode !!str 'error' as a !!float"}, + {"v: !!float 'error'", "YAML error: cannot decode !!str `error` as a !!float"}, {"v: [A,", "YAML error: line 1: did not find expected node content"}, {"v:\n- [A,", "YAML error: line 2: did not find expected node content"}, {"a: *b\n", "YAML error: Unknown anchor 'b' referenced"}, {"a: &a\n b: *a\n", "YAML error: Anchor 'a' value contains itself"}, {"value: -", "YAML error: block sequence entries are not allowed in this context"}, + {"a: !!binary ==", "YAML error: !!binary value contains invalid base64 data"}, } func (s *S) TestUnmarshalErrors(c *C) {
diff --git a/emitterc.go b/emitterc.go index 542ffd2..9b3dc4a 100644 --- a/emitterc.go +++ b/emitterc.go
@@ -973,8 +973,8 @@ if bytes.HasPrefix(tag, tag_directive.prefix) { emitter.tag_data.handle = tag_directive.handle emitter.tag_data.suffix = tag[len(tag_directive.prefix):] + return true } - return true } emitter.tag_data.suffix = tag return true @@ -1279,6 +1279,9 @@ for k := 0; k < w; k++ { octet := value[i] i++ + if !put(emitter, '%') { + return false + } c := octet >> 4 if c < 10 {
diff --git a/encode.go b/encode.go index 650b5d4..4a57ee3 100644 --- a/encode.go +++ b/encode.go
@@ -60,6 +60,7 @@ var value interface{} if getter, ok := in.Interface().(Getter); ok { tag, value = getter.GetYAML() + tag = longTag(tag) if value == nil { e.nilv() return @@ -174,7 +175,7 @@ // The base 60 float notation in YAML 1.1 is a terrible idea and is unsupported // in YAML 1.2 and by this package, but these should be marshalled quoted for // the time being for compatibility with other parsers. -func isBase60(s string) (result bool) { +func isBase60Float(s string) (result bool) { // Fast path. if s == "" { return false @@ -184,18 +185,31 @@ return false } // Do the full match. - return base64re.MatchString(s) + return base60float.MatchString(s) } // From http://yaml.org/type/float.html, except the regular expression there // is bogus. In practice parsers do not enforce the "\.[0-9_]*" suffix. -var base64re = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0-9_]*)?$`) +var base60float = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0-9_]*)?$`) func (e *encoder) stringv(tag string, in reflect.Value) { var style yaml_scalar_style_t s := in.String() - if rtag, _ := resolve("", s); rtag != "!!str" || isBase60(s) { + rtag, rs := resolve("", s) + if rtag == yaml_BINARY_TAG { + if tag == "" || tag == yaml_STR_TAG { + tag = rtag + s = rs.(string) + } else if tag == yaml_BINARY_TAG { + fail("explicitly tagged !!binary data must be base64-encoded") + } else { + fail("cannot marshal invalid UTF-8 data as " + shortTag(tag)) + } + } + if tag == "" && (rtag != yaml_STR_TAG || isBase60Float(s)) { style = yaml_DOUBLE_QUOTED_SCALAR_STYLE + } else if strings.Contains(s, "\n") { + style = yaml_LITERAL_SCALAR_STYLE } else { style = yaml_PLAIN_SCALAR_STYLE } @@ -242,9 +256,6 @@ func (e *encoder) emitScalar(value, anchor, tag string, style yaml_scalar_style_t) { implicit := tag == "" - if !implicit { - style = yaml_PLAIN_SCALAR_STYLE - } e.must(yaml_scalar_event_initialize(&e.event, []byte(anchor), []byte(tag), []byte(value), implicit, implicit, style)) e.emit() }
diff --git a/encode_test.go b/encode_test.go index 3f15b32..9f745c9 100644 --- a/encode_test.go +++ b/encode_test.go
@@ -87,7 +87,7 @@ "v:\n- A\n- B\n", }, { map[string][]string{"v": []string{"A", "B\nC"}}, - "v:\n- A\n- 'B\n\n C'\n", + "v:\n- A\n- |-\n B\n C\n", }, { map[string][]interface{}{"v": []interface{}{"A", 1, map[string][]int{"B": []int{2, 3}}}}, "v:\n- A\n- 1\n- B:\n - 2\n - 3\n", @@ -232,6 +232,27 @@ map[string]string{"a": "1:1"}, "a: \"1:1\"\n", }, + + // Binary data. + { + map[string]string{"a": "\x00"}, + "a: \"\\0\"\n", + }, { + map[string]string{"a": "\x80\x81\x82"}, + "a: !!binary gIGC\n", + }, { + map[string]string{"a": strings.Repeat("\x90", 54)}, + "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", + }, { + map[string]interface{}{"a": typeWithGetter{"!!str", "\x80\x81\x82"}}, + "a: !!binary gIGC\n", + }, + + // Escaping of tags. + { + map[string]interface{}{"a": typeWithGetter{"foo!bar", 1}}, + "a: !<foo%21bar> 1\n", + }, } func (s *S) TestMarshal(c *C) { @@ -247,12 +268,17 @@ error string panic string }{{ - &struct { + value: &struct { B int inlineB ",inline" }{1, inlineB{2, inlineC{3}}}, - "", - `Duplicated key 'b' in struct struct \{ B int; .*`, + panic: `Duplicated key 'b' in struct struct \{ B int; .*`, +}, { + value: typeWithGetter{"!!binary", "\x80"}, + error: "YAML error: explicitly tagged !!binary data must be base64-encoded", +}, { + value: typeWithGetter{"!!float", "\x80"}, + error: `YAML error: cannot marshal invalid UTF-8 data as !!float`, }} func (s *S) TestMarshalErrors(c *C) {
diff --git a/resolve.go b/resolve.go index ae2f5c2..06c698a 100644 --- a/resolve.go +++ b/resolve.go
@@ -1,9 +1,12 @@ package yaml import ( + "encoding/base64" + "fmt" "math" "strconv" "strings" + "unicode/utf8" ) // TODO: merge, timestamps, base 60 floats, omap. @@ -33,18 +36,18 @@ tag string l []string }{ - {true, "!!bool", []string{"y", "Y", "yes", "Yes", "YES"}}, - {true, "!!bool", []string{"true", "True", "TRUE"}}, - {true, "!!bool", []string{"on", "On", "ON"}}, - {false, "!!bool", []string{"n", "N", "no", "No", "NO"}}, - {false, "!!bool", []string{"false", "False", "FALSE"}}, - {false, "!!bool", []string{"off", "Off", "OFF"}}, - {nil, "!!null", []string{"~", "null", "Null", "NULL"}}, - {math.NaN(), "!!float", []string{".nan", ".NaN", ".NAN"}}, - {math.Inf(+1), "!!float", []string{".inf", ".Inf", ".INF"}}, - {math.Inf(+1), "!!float", []string{"+.inf", "+.Inf", "+.INF"}}, - {math.Inf(-1), "!!float", []string{"-.inf", "-.Inf", "-.INF"}}, - {"<<", "!!merge", []string{"<<"}}, + {true, yaml_BOOL_TAG, []string{"y", "Y", "yes", "Yes", "YES"}}, + {true, yaml_BOOL_TAG, []string{"true", "True", "TRUE"}}, + {true, yaml_BOOL_TAG, []string{"on", "On", "ON"}}, + {false, yaml_BOOL_TAG, []string{"n", "N", "no", "No", "NO"}}, + {false, yaml_BOOL_TAG, []string{"false", "False", "FALSE"}}, + {false, yaml_BOOL_TAG, []string{"off", "Off", "OFF"}}, + {nil, yaml_NULL_TAG, []string{"", "~", "null", "Null", "NULL"}}, + {math.NaN(), yaml_FLOAT_TAG, []string{".nan", ".NaN", ".NAN"}}, + {math.Inf(+1), yaml_FLOAT_TAG, []string{".inf", ".Inf", ".INF"}}, + {math.Inf(+1), yaml_FLOAT_TAG, []string{"+.inf", "+.Inf", "+.INF"}}, + {math.Inf(-1), yaml_FLOAT_TAG, []string{"-.inf", "-.Inf", "-.INF"}}, + {"<<", yaml_MERGE_TAG, []string{"<<"}}, } m := resolveMap @@ -58,90 +61,130 @@ const longTagPrefix = "tag:yaml.org,2002:" func shortTag(tag string) string { + // TODO This can easily be made faster and produce less garbage. if strings.HasPrefix(tag, longTagPrefix) { return "!!" + tag[len(longTagPrefix):] } return tag } +func longTag(tag string) string { + if strings.HasPrefix(tag, "!!") { + return longTagPrefix + tag[2:] + } + return tag +} + func resolvableTag(tag string) bool { switch tag { - case "", "!!str", "!!bool", "!!int", "!!float", "!!null": + case "", yaml_STR_TAG, yaml_BOOL_TAG, yaml_INT_TAG, yaml_FLOAT_TAG, yaml_NULL_TAG: return true } return false } func resolve(tag string, in string) (rtag string, out interface{}) { - tag = shortTag(tag) if !resolvableTag(tag) { return tag, in } defer func() { - if tag != "" && tag != rtag { - fail("Can't decode " + rtag + " '" + in + "' as a " + tag) + switch tag { + case "", rtag, yaml_STR_TAG, yaml_BINARY_TAG: + return } + fail(fmt.Sprintf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag))) }() - if in == "" { - return "!!null", nil + // Any data is accepted as a !!str or !!binary. + // Otherwise, the prefix is enough of a hint about what it might be. + hint := byte('N') + if in != "" { + hint = resolveTable[in[0]] } - - c := resolveTable[in[0]] - if c == 0 { - // It's a string for sure. Nothing to do. - return "!!str", in - } - - // Handle things we can lookup in a map. - if item, ok := resolveMap[in]; ok { - return item.tag, item.value - } - - switch c { - case 'M': - // We've already checked the map above. - - case '.': - // Not in the map, so maybe a normal float. - floatv, err := strconv.ParseFloat(in, 64) - if err == nil { - return "!!float", floatv + if hint != 0 && tag != yaml_STR_TAG && tag != yaml_BINARY_TAG { + // Handle things we can lookup in a map. + if item, ok := resolveMap[in]; ok { + return item.tag, item.value } - // XXX Handle base 60 floats here (WTF!) - case 'D', 'S': - // Int, float, or timestamp. - plain := strings.Replace(in, "_", "", -1) - intv, err := strconv.ParseInt(plain, 0, 64) - if err == nil { - if intv == int64(int(intv)) { - return "!!int", int(intv) - } else { - return "!!int", intv - } - } - floatv, err := strconv.ParseFloat(plain, 64) - if err == nil { - return "!!float", floatv - } - if strings.HasPrefix(plain, "0b") { - intv, err := strconv.ParseInt(plain[2:], 2, 64) + // Base 60 floats are a bad idea, were dropped in YAML 1.2, and + // are purposefully unsupported here. They're still quoted on + // the way out for compatibility with other parser, though. + + switch hint { + case 'M': + // We've already checked the map above. + + case '.': + // Not in the map, so maybe a normal float. + floatv, err := strconv.ParseFloat(in, 64) if err == nil { - return "!!int", int(intv) + return yaml_FLOAT_TAG, floatv } - } else if strings.HasPrefix(plain, "-0b") { - intv, err := strconv.ParseInt(plain[3:], 2, 64) - if err == nil { - return "!!int", -int(intv) - } - } - // XXX Handle timestamps here. - default: - panic("resolveTable item not yet handled: " + - string([]byte{c}) + " (with " + in + ")") + case 'D', 'S': + // Int, float, or timestamp. + plain := strings.Replace(in, "_", "", -1) + intv, err := strconv.ParseInt(plain, 0, 64) + if err == nil { + if intv == int64(int(intv)) { + return yaml_INT_TAG, int(intv) + } else { + return yaml_INT_TAG, intv + } + } + floatv, err := strconv.ParseFloat(plain, 64) + if err == nil { + return yaml_FLOAT_TAG, floatv + } + if strings.HasPrefix(plain, "0b") { + intv, err := strconv.ParseInt(plain[2:], 2, 64) + if err == nil { + return yaml_INT_TAG, int(intv) + } + } else if strings.HasPrefix(plain, "-0b") { + intv, err := strconv.ParseInt(plain[3:], 2, 64) + if err == nil { + return yaml_INT_TAG, -int(intv) + } + } + // XXX Handle timestamps here. + + default: + panic("resolveTable item not yet handled: " + string(rune(hint)) + " (with " + in + ")") + } } - return "!!str", in + if tag == yaml_BINARY_TAG { + return yaml_BINARY_TAG, in + } + if utf8.ValidString(in) { + return yaml_STR_TAG, in + } + return yaml_BINARY_TAG, encodeBase64(in) +} + +// encodeBase64 encodes s as base64 that is broken up into multiple lines +// as appropriate for the resulting length. +func encodeBase64(s string) string { + const lineLen = 70 + encLen := base64.StdEncoding.EncodedLen(len(s)) + lines := encLen/lineLen + 1 + buf := make([]byte, encLen*2+lines) + in := buf[0:encLen] + out := buf[encLen:] + base64.StdEncoding.Encode(in, []byte(s)) + k := 0 + for i := 0; i < len(in); i += lineLen { + j := i + lineLen + if j > len(in) { + j = len(in) + } + k += copy(out[k:], in[i:j]) + if lines > 1 { + out[k] = '\n' + k++ + } + } + return string(out[:k]) }
diff --git a/yamlh.go b/yamlh.go index 6624d6c..4b020b1 100644 --- a/yamlh.go +++ b/yamlh.go
@@ -294,6 +294,10 @@ yaml_SEQ_TAG = "tag:yaml.org,2002:seq" // The tag !!seq is used to denote sequences. yaml_MAP_TAG = "tag:yaml.org,2002:map" // The tag !!map is used to denote mapping. + // Not in original libyaml. + yaml_BINARY_TAG = "tag:yaml.org,2002:binary" + yaml_MERGE_TAG = "tag:yaml.org,2002:merge" + yaml_DEFAULT_SCALAR_TAG = yaml_STR_TAG // The default scalar tag is !!str. yaml_DEFAULT_SEQUENCE_TAG = yaml_SEQ_TAG // The default sequence tag is !!seq. yaml_DEFAULT_MAPPING_TAG = yaml_MAP_TAG // The default mapping tag is !!map.