Initial commit

diff --git a/lex.go b/lex.go
new file mode 100644
index 0000000..4b9b41a
--- /dev/null
+++ b/lex.go

@@ -0,0 +1,295 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package properties
+
+import (
+	"fmt"
+	// "log"
+	"strings"
+	// "unicode"
+	"strconv"
+	"unicode/utf8"
+)
+
+// item represents a token or text string returned from the scanner.
+type item struct {
+	typ itemType // The type of this item.
+	pos Pos      // The starting position, in bytes, of this item in the input string.
+	val string   // The value of this item.
+}
+
+func (i item) String() string {
+	switch {
+	case i.typ == itemEOF:
+		return "EOF"
+	case i.typ == itemError:
+		return i.val
+	case len(i.val) > 10:
+		return fmt.Sprintf("%.10q...", i.val)
+	}
+	return fmt.Sprintf("%q", i.val)
+}
+
+type Pos int
+
+// itemType identifies the type of lex items.
+type itemType int
+
+const (
+	itemError itemType = iota // error occurred; value is text of error
+	itemEOF
+	itemDelim // a = or : delimiter char
+	itemKey   // a key
+	itemValue // a value
+)
+
+const eof = -1
+
+// stateFn represents the state of the scanner as a function that returns the next state.
+type stateFn func(*lexer) stateFn
+
+// lexer holds the state of the scanner.
+type lexer struct {
+	input   string    // the string being scanned
+	state   stateFn   // the next lexing function to enter
+	pos     Pos       // current position in the input
+	start   Pos       // start position of this item
+	width   Pos       // width of last rune read from input
+	lastPos Pos       // position of most recent item returned by nextItem
+	items   chan item // channel of scanned items
+}
+
+// next returns the next rune in the input.
+func (l *lexer) next() rune {
+	if int(l.pos) >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
+	l.width = Pos(w)
+	l.pos += l.width
+	return r
+}
+
+// peek returns but does not consume the next rune in the input.
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+// backup steps back one rune. Can only be called once per call of next.
+func (l *lexer) backup() {
+	l.pos -= l.width
+}
+
+// emit passes an item back to the client.
+func (l *lexer) emit(t itemType) {
+	l.emitWithValue(t, l.input[l.start:l.pos])
+}
+
+// emitWithValue passes an item with a specific value back to the client.
+func (l *lexer) emitWithValue(t itemType, value string) {
+	item := item{t, l.start, value}
+	// log.Printf("lex.emit: %s", item)
+	l.items <- item
+	l.start = l.pos
+}
+
+// ignore skips over the pending input before this point.
+func (l *lexer) ignore() {
+	l.start = l.pos
+}
+
+// accept consumes the next rune if it's from the valid set.
+func (l *lexer) accept(valid string) bool {
+	if strings.IndexRune(valid, l.next()) >= 0 {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+// acceptRun consumes a run of runes from the valid set.
+func (l *lexer) acceptRun(valid string) {
+	for strings.IndexRune(valid, l.next()) >= 0 {
+	}
+	l.backup()
+}
+
+// accept until consumes runes until a termination rune.
+func (l *lexer) acceptUntil(term rune) {
+	for r := l.next(); r != eof && r != term; {
+	}
+}
+
+// hasText returns true if the current parsed text is not empty.
+func (l *lexer) isNotEmpty() bool {
+	return l.pos > l.start
+}
+
+// lineNumber reports which line we're on, based on the position of
+// the previous item returned by nextItem. Doing it this way
+// means we don't have to worry about peek double counting.
+func (l *lexer) lineNumber() int {
+	return 1 + strings.Count(l.input[:l.lastPos], "\n")
+}
+
+// errorf returns an error token and terminates the scan by passing
+// back a nil pointer that will be the next state, terminating l.nextItem.
+func (l *lexer) errorf(format string, args ...interface{}) stateFn {
+	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
+	return nil
+}
+
+// nextItem returns the next item from the input.
+func (l *lexer) nextItem() item {
+	item := <-l.items
+	l.lastPos = item.pos
+	return item
+}
+
+// lex creates a new scanner for the input string.
+func lex(input string) *lexer {
+	l := &lexer{
+		input: input,
+		items: make(chan item),
+	}
+	go l.run()
+	return l
+}
+
+// run runs the state machine for the lexer.
+func (l *lexer) run() {
+	for l.state = lexKey(l); l.state != nil; {
+		l.state = l.state(l)
+	}
+}
+
+// state functions
+// TODO: handle comments
+// TODO: handle multi-line values
+// TODO: handle unicode literals
+
+// lexKey scans the key up to a delimiter
+func lexKey(l *lexer) stateFn {
+	if l.peek() == eof {
+		l.emit(itemEOF)
+		return nil
+	}
+
+	runes := make([]rune, 0, 32)
+
+Loop:
+	for {
+		switch r := l.next(); {
+
+		case r == '\\':
+			switch r = l.next(); {
+
+			// escaped key termination chars
+			case r == ' ' || r == ':' || r == '=':
+				runes = append(runes, r)
+
+			// unicode literals
+			case r == 'u' || r == 'U':
+				r, err := scanUnicodeLiteral(l)
+				if err != nil {
+					return l.errorf(err.Error())
+				}
+				runes = append(runes, r)
+
+			// EOF
+			case r == eof:
+				return l.errorf("premature EOF")
+
+			// everything else is an error
+			default:
+				return l.errorf("invalid escape sequence %s", string(r))
+			}
+
+		// terminate the key (same as escapes above)
+		case r == ' ' || r == ':' || r == '=':
+			l.backup()
+			break Loop
+
+		case r == eof:
+			return l.errorf("premature EOF")
+
+		default:
+			runes = append(runes, r)
+		}
+	}
+
+	if len(runes) > 0 {
+		l.emitWithValue(itemKey, string(runes))
+	}
+
+	// ignore trailing spaces
+	l.acceptRun(" ")
+	l.ignore()
+
+	return lexDelim
+}
+
+// lexDelim scans the delimiter. We expect to be just before the delimiter
+func lexDelim(l *lexer) stateFn {
+	if l.next() == eof {
+		return l.errorf("premature EOF")
+	}
+	l.emit(itemDelim)
+	return lexValue
+}
+
+// lexValue scans text until the end of the line. We expect to be just after the delimiter
+func lexValue(l *lexer) stateFn {
+	// ignore leading spaces
+	l.acceptRun(" ")
+	l.ignore()
+
+	runes := make([]rune, 0, 128)
+	for {
+		switch r := l.next(); {
+		// TODO: handle multiline with indent on subsequent lines
+		// TODO: handle unicode literals \uXXXX and \Uxxxx
+		// TODO: handle escaped chars \n, \r, \t and \\
+		case r == '\n':
+			l.emitWithValue(itemValue, string(runes))
+
+			// ignore the new line
+			l.ignore()
+			return lexKey
+
+		case r == eof:
+			l.emitWithValue(itemValue, string(runes))
+			l.emit(itemEOF)
+			return nil
+
+		default:
+			runes = append(runes, r)
+		}
+	}
+}
+
+// scans the digits of the unicode literal in \uXXXX form.
+// We expect to be before the first digit
+func scanUnicodeLiteral(l *lexer) (rune, error) {
+	d := make([]rune, 4)
+	for i := 0; i < 4; i++ {
+		d[i] = l.next()
+		if d[i] == eof {
+			return eof, nil
+		}
+	}
+
+	u := string(d)
+	s, err := strconv.Unquote(fmt.Sprintf("'\\u%s'", u))
+	if err != nil {
+		return 0, fmt.Errorf("invalid unicode literal %s", u)
+	}
+
+	r, _ := utf8.DecodeRuneInString(s)
+	return r, nil
+}

diff --git a/parser.go b/parser.go
new file mode 100644
index 0000000..c2a6e35
--- /dev/null
+++ b/parser.go

@@ -0,0 +1,80 @@
+package properties
+
+import (
+	"fmt"
+	// "log"
+	"runtime"
+)
+
+type parser struct {
+	lex *lexer
+}
+
+func newParser() *parser {
+	return &parser{}
+}
+
+func (p *parser) Parse(input string) (props *Properties, err error) {
+	// log.Printf("Parsing input '%s'", input)
+
+	defer p.recover(&err)
+	p.lex = lex(input)
+	props = &Properties{m:make(map[string]string)}
+
+	for {
+		token := p.expectOneOf(itemKey, itemEOF)
+		if token.typ == itemEOF {
+			break
+		}
+		key := token.val
+		p.expect(itemDelim)
+		token = p.expect(itemValue)
+		props.Set(key, token.val)
+	}
+
+	return props, nil
+}
+
+func (p *parser) errorf(format string, args ...interface{}) {
+	format = fmt.Sprintf("properties: Line %d: %s", p.lex.lineNumber(), format)
+	panic(fmt.Errorf(format, args...))
+}
+
+func (p *parser) expect(expected itemType) (token item) {
+	token = p.lex.nextItem()
+	if token.typ != expected {
+		p.unexpected(token)
+	}
+	return token
+}
+
+func (p *parser) expectOneOf(expected1, expected2 itemType) (token item) {
+	token = p.lex.nextItem()
+	if token.typ != expected1 && token.typ != expected2 {
+		p.unexpected(token)
+	}
+	return token
+}
+
+func (p *parser) unexpected(token item) {
+	p.errorf(token.String())
+}
+
+// recover is the handler that turns panics into returns from the top level of Parse.
+func (p *parser) recover(errp *error) {
+	e := recover()
+	if e != nil {
+		if _, ok := e.(runtime.Error); ok {
+			panic(e)
+		}
+		// if p != nil {
+		// 	p.stopParse()
+		// }
+		*errp = e.(error)
+	}
+	return
+}
+
+// func (p *parser) stopParse() {
+
+// }

diff --git a/properties.go b/properties.go
new file mode 100644
index 0000000..2b2a5ef
--- /dev/null
+++ b/properties.go

@@ -0,0 +1,77 @@
+package properties
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"unicode/utf8"
+)
+
+type Properties struct {
+	m map[string]string
+}
+
+// Reads bytes fully and parses them as ISO-8859-1.
+func NewProperties(r io.Reader) (*Properties, error) {
+	buf, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+
+	return NewPropertiesFromString(toUtf8(buf))
+}
+
+func NewPropertiesFromISO8859_1(buf []byte) (*Properties, error) {
+	return newParser().Parse(toUtf8(buf))
+}
+
+// Java properties spec says that .properties files must be ISO-8859-1
+// encoded. Therefore, we first convert them to UTF-8 and then parse them.
+func NewPropertiesFromString(input string) (*Properties, error) {
+	if err := isISO8859_1(input); err != nil {
+		return nil, err
+	}
+	return newParser().Parse(input)
+}
+
+// returns the value for the given key
+func (p *Properties) Get(key string) (value string, ok bool) {
+	value, ok = p.m[key]
+	return value, ok
+}
+
+// sets the property key = value and returns the previous value if exists or an empty string
+func (p *Properties) Set(key, value string) (prevValue string) {
+	prevValue, ok := p.m[key]
+	if !ok {
+		prevValue = ""
+	}
+
+	p.m[key] = value
+	return prevValue
+}
+
+// returns the number of keys
+func (p *Properties) Len() int {
+	return len(p.m)
+}
+
+// taken from
+// http://stackoverflow.com/questions/13510458/golang-convert-iso8859-1-to-utf8
+func toUtf8(iso8859_1_buf []byte) string {
+	buf := make([]rune, len(iso8859_1_buf))
+	for i, b := range iso8859_1_buf {
+		buf[i] = rune(b)
+	}
+	return string(buf)
+}
+
+func isISO8859_1(s string) error {
+	for i := 0; i < len(s); i++ {
+		r, w := utf8.DecodeRuneInString(s[i:])
+		if w > 1 || r > 255 {
+			return fmt.Errorf("invalid ISO-8859-1 input. %s", s)
+		}
+	}
+	return nil
+}

diff --git a/properties_test.go b/properties_test.go
new file mode 100644
index 0000000..b8ce5dd
--- /dev/null
+++ b/properties_test.go

@@ -0,0 +1,88 @@
+package properties
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	. "github.scm.corp.ebay.com/ecg-marktplaats/cas-go/third_party/launchpad.net/gocheck"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type LoadSuite struct{}
+
+var _ = Suite(&LoadSuite{})
+
+func (l *LoadSuite) TestKeyWithEmptyValue(c *C) {
+	testAllDelimiterCombinations(c, "key", "")
+}
+
+func (l *LoadSuite) TestOneKeyValue(c *C) {
+	testAllDelimiterCombinations(c, "key", "value")
+}
+
+func (l *LoadSuite) TestValueWithTrailingSpaces(c *C) {
+	testAllDelimiterCombinations(c, "key", "value   ")
+}
+
+func (l *LoadSuite) TestEscapedCharsInKey(c *C) {
+	testKeyValue(c, "k\\ e\\:y\\= = value", "k e:y=", "value")
+}
+
+func (l *LoadSuite) TestUnicodeLiteralInKey(c *C) {
+	testKeyValue(c, "key\\u2318 = value", "key⌘", "value")
+	testKeyValue(c, "key\\U2318 = value", "key⌘", "value")
+}
+
+// func (l *LoadSuite) TestEscapedCharsInValue(c *C) {
+// 	testKeyValue(c, "key = v\\ a\\:lu\\=e", "key", "v a:lu=e")
+// }
+
+// func (l *LoadSuite) TestMultilineValue(c *C) {
+// 	input := "key = valueA,\\\n    valueB"
+// 	testKeyValue(c, input, "key", "valueA,valueB")
+// }
+
+func (l *LoadSuite) TestFailWithPrematureEOF(c *C) {
+	_, err := NewPropertiesFromString("key")
+	c.Assert(err, NotNil)
+	c.Assert(strings.Contains(err.Error(), "premature EOF"), Equals, true)
+}
+
+func (l *LoadSuite) TestFailWithNonISO8859_1Input(c *C) {
+	_, err := NewPropertiesFromString("key₡")
+	c.Assert(err, NotNil)
+	c.Assert(strings.Contains(err.Error(), "invalid ISO-8859-1 input"), Equals, true)
+}
+
+func (l *LoadSuite) TestFailWithInvalidUnicodeLiteralInKey(c *C) {
+	_, err := NewPropertiesFromString("key\\ugh32 = value")
+	c.Assert(err, NotNil)
+	c.Assert(strings.Contains(err.Error(), "invalid unicode literal"), Equals, true)
+}
+
+// tests all combinations of delimiters plus leading and/or trailing spaces.
+func testAllDelimiterCombinations(c *C, key, value string) {
+	delimiters := []string{"=", " =", "= ", " = ", ":", " :", ": ", " : "}
+	for _, delim := range delimiters {
+		testKeyValue(c, fmt.Sprintf("%s%s%s", key, delim, value), key, value)
+		testKeyValue(c, fmt.Sprintf("%s%s%s\n", key, delim, value), key, value)
+	}
+}
+
+// tests a single key/value combination for a given input.
+func testKeyValue(c *C, input, key, value string) {
+	// fmt.Printf("Testing '%s'\n", input)
+	p, err := NewPropertiesFromString(input)
+	c.Assert(err, IsNil)
+	c.Assert(p, NotNil)
+	c.Assert(p.Len(), Equals, 1)
+	assertKeyValue(c, p, key, value)
+}
+
+func assertKeyValue(c *C, p *Properties, key, value string) {
+	v, ok := p.Get(key)
+	c.Assert(ok, Equals, true)
+	c.Assert(v, Equals, value)
+}