Move decoding functionality into a separate Decoder.
diff --git a/decoder.go b/decoder.go new file mode 100644 index 0000000..705873c --- /dev/null +++ b/decoder.go
@@ -0,0 +1,58 @@ +package properties + +import ( + "fmt" + "io" + "io/ioutil" +) + +type Decoder struct { + r io.Reader +} + +type Encoding uint + +const ( + UTF8 Encoding = 1 << iota + ISO_8859_1 +) + +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r: r} +} + +func (d *Decoder) Decode() (*Properties, error) { + return decode(d.r, ISO_8859_1) +} + +func (d *Decoder) DecodeWithEncoding(enc Encoding) (*Properties, error) { + return decode(d.r, enc) +} + +func decode(r io.Reader, enc Encoding) (*Properties, error) { + buf, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + return newParser().Parse(convert(buf, enc)) +} + +// The Java properties spec says that .properties files must be ISO-8859-1 +// encoded. Since the first 256 unicode code points cover ISO-8859-1 we +// can convert each byte into a rune and use the resulting string +// as UTF-8 input for the parser. +func convert(buf []byte, enc Encoding) string { + switch enc { + case UTF8: + return string(buf) + case ISO_8859_1: + runes := make([]rune, len(buf)) + for i, b := range buf { + runes[i] = rune(b) + } + return string(runes) + default: + panic(fmt.Sprintf("unsupported encoding %v", enc)) + } +}
diff --git a/properties.go b/properties.go index 86e68d0..15e707c 100644 --- a/properties.go +++ b/properties.go
@@ -2,40 +2,10 @@ package properties -import ( - "fmt" - "io" - "io/ioutil" - "unicode/utf8" -) - type Properties struct { m map[string]string } -// Reads bytes fully and parses them as ISO-8859-1. -func NewProperties(r io.Reader) (*Properties, error) { - buf, err := ioutil.ReadAll(r) - if err != nil { - return nil, err - } - - return NewPropertiesFromString(toUtf8(buf)) -} - -func NewPropertiesFromISO8859_1(buf []byte) (*Properties, error) { - return newParser().Parse(toUtf8(buf)) -} - -// Java properties spec says that .properties files must be ISO-8859-1 -// encoded. Therefore, we first convert them to UTF-8 and then parse them. -func NewPropertiesFromString(input string) (*Properties, error) { - if err := isISO8859_1(input); err != nil { - return nil, err - } - return newParser().Parse(input) -} - // returns the value for the given key func (p *Properties) Get(key string) (value string, ok bool) { value, ok = p.m[key] @@ -57,23 +27,3 @@ func (p *Properties) Len() int { return len(p.m) } - -// taken from -// http://stackoverflow.com/questions/13510458/golang-convert-iso8859-1-to-utf8 -func toUtf8(iso8859_1_buf []byte) string { - buf := make([]rune, len(iso8859_1_buf)) - for i, b := range iso8859_1_buf { - buf[i] = rune(b) - } - return string(buf) -} - -func isISO8859_1(s string) error { - for i := 0; i < len(s); i++ { - r, w := utf8.DecodeRuneInString(s[i:]) - if w > 1 || r > 255 { - return fmt.Errorf("invalid ISO-8859-1 input. %s", s) - } - } - return nil -}
diff --git a/properties_test.go b/properties_test.go index 08cfb1e..a0c8102 100644 --- a/properties_test.go +++ b/properties_test.go
@@ -78,10 +78,6 @@ testError(c, "key", "premature EOF") } -func (l *LoadSuite) TestFailWithNonISO8859_1Input(c *C) { - testError(c, "key₡", "invalid ISO-8859-1 input") -} - func (l *LoadSuite) TestFailWithInvalidUnicodeLiteralInKey(c *C) { testError(c, "key\\ugh32 = value", "invalid unicode literal") } @@ -93,7 +89,8 @@ } b.ResetTimer() for i := 0; i < b.N; i++ { - NewPropertiesFromString(input) + d := NewDecoder(strings.NewReader(input)) + d.Decode() } } @@ -108,7 +105,8 @@ // tests key/value pairs for a given input. func testKeyValue(c *C, input string, keyvalues ...string) { - p, err := NewPropertiesFromString(input) + d := NewDecoder(strings.NewReader(input)) + p, err := d.Decode() c.Assert(err, IsNil) c.Assert(p, NotNil) c.Assert(p.Len(), Equals, len(keyvalues)/2) @@ -119,7 +117,8 @@ // tests whether a given input produces a given error message. func testError(c *C, input, msg string) { - _, err := NewPropertiesFromString(input) + d := NewDecoder(strings.NewReader(input)) + _, err := d.Decode() c.Assert(err, NotNil) c.Assert(strings.Contains(err.Error(), msg), Equals, true) }