转到-如何创建解析器

I want to build a parser but have some problems understanding how to do this.

Sample string I would like to parse

{key1 = value1 | key2 = {key3 = value3} | key4 = {key5 = { key6 = value6 }}}

Preferably I would like to get an output similar to a nested map

map[key1] = value1
map[key2] = (map[key3] = value3)
map[key4] = (map[key5] = (map[key6] = value6))

How could this be done? Am I aiming in the wrong direction?

Writing a parser is a complicated topic that is too big to cover in a single answer.

Rob Pike gave an excellent talk that walks through writing a lexer (which is a half of the parser) in Go: http://www.youtube.com/watch?v=HxaD_trXwRE

You should also look at e.g. parser code in Go standard library for an example on how to do it: http://golang.org/src/pkg/go/parser/parser.go

There's also plenty resources on parsing on the internet. They might have examples in other languages but it's just a matter of translating the syntax to Go.

I recommend reading up on recursive descent parsing (e.g. http://www.cs.binghamton.edu/~zdu/parsdemo/recintro.html) or top down parsing (e.g. http://javascript.crockford.com/tdop/tdop.html, http://effbot.org/zone/simple-top-down-parsing.htm).

What about using the standard goyacc tool? Here is a skeleton:

main.y

%{
package main

import (
    "fmt"
    "log"
)
%}

%union{
    tok int
    val interface{}
    pair struct{key, val interface{}}
    pairs map[interface{}]interface{}
}

%token KEY
%token VAL

%type <val> KEY VAL
%type <pair> pair
%type <pairs> pairs

%%

goal:
    '{' pairs '}'
    {
        yylex.(*lex).m = $2
    }

pairs:
    pair
    {
        $$ = map[interface{}]interface{}{$1.key: $1.val}
    }
|   pairs '|' pair
    {
        $$[$3.key] = $3.val
    }

pair:
    KEY '=' VAL
    {
        $$.key, $$.val = $1, $3
    }
|   KEY '=' '{' pairs '}'
    {
        $$.key, $$.val = $1, $4
    }


%%

type token struct {
    tok int
    val interface{}
}

type lex struct {
    tokens []token
    m map[interface{}]interface{}
}

func (l *lex) Lex(lval *yySymType) int {
    if len(l.tokens) == 0 {
        return 0
    }

    v := l.tokens[0]
    l.tokens = l.tokens[1:]
    lval.val = v.val
    return v.tok
}

func (l *lex) Error(e string) {
    log.Fatal(e)
}

func main() {
    l := &lex{
        // {key1 = value1 | key2 = {key3 = value3} | key4 = {key5 = { key6 = value6 }}}
        []token{
            {'{', ""},
            {KEY, "key1"},
            {'=', ""},
            {VAL, "value1"},
            {'|', ""},
            {KEY, "key2"},
            {'=', ""}, 
            {'{', ""},
            {KEY, "key3"},
            {'=', ""},
            {VAL, "value3"},
            {'}', ""},
            {'|', ""},
            {KEY, "key4"},
            {'=', ""},
            {'{', ""},
            {KEY, "key5"},
            {'=', ""},
            {'{', ""},
            {KEY, "key6"},
            {'=', ""},
            {VAL, "value6"},
            {'}', ""},
            {'}', ""},
            {'}', ""},
        },
        map[interface{}]interface{}{},
    }
    yyParse(l)
    fmt.Println(l.m)
}

Output

$ go tool yacc -o main.go main.y && go run main.go
map[key4:map[key5:map[key6:value6]] key1:value1 key2:map[key3:value3]]
$ 

That particular format is very similar to json. You could use the following code to leverage that similarity:

    var txt = `{key1 = "\"value1\"
" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
    var s scanner.Scanner
    s.Init(strings.NewReader(txt))
    var b []byte

loop:
    for {
        switch tok := s.Scan(); tok {
        case scanner.EOF:
            break loop
        case '|':
            b = append(b, ',')
        case '=':
            b = append(b, ':')
        case scanner.Ident:
            b = append(b, strconv.Quote(s.TokenText())...)
        default:
            b = append(b, s.TokenText()...)
        }
    }

    var m map[string]interface{}
    err := json.Unmarshal(b, &m)
    if err != nil {
        // handle error
    }

    fmt.Printf("%#v
",m)

Would you like try to parsec for golang edition? I write a rune(for unicode) fork of goparsec(https://github.com/sanyaade-buildtools/goparsec) what is https://github.com/Dwarfartisan/goparsec .

Haskell parsec is a power tools for make parser. The first perl6 parser named pugs was written by it. My golang Edition is not simple than yacc, but it is easier than yacc.

For this example, I wrote code as this:

parser.go

package main

import (
    "fmt"
    psc "github.com/Dwarfartisan/goparsec"
)

type kv struct {
    key   string
    value interface{}
}

var tchar = psc.NoneOf("|{}= ")

func escaped(st psc.ParseState) (interface{}, error) {
    _, err := psc.Try(psc.Rune('\\'))(st)
    if err == nil {
        r, err := psc.AnyRune(st)
        if err == nil {
            switch r.(rune) {
            case 't':
                return '\t', nil
            case '"':
                return '"', nil
            case 'n':
                return '
', nil
            case '\\':
                return '\\', nil
            default:
                return nil, st.Trap("Unknown escape \\%r", r)
            }
        } else {
            return nil, err
        }
    } else {
        return psc.NoneOf("\"")(st)
    }
}

var token = psc.Either(
    psc.Between(psc.Rune('"'), psc.Rune('"'),
        psc.Try(psc.Bind(psc.Many1(escaped), psc.ReturnString))),
    psc.Bind(psc.Many1(tchar), psc.ReturnString))

// rune with skip spaces
func syms(r rune) psc.Parser {
    return func(st psc.ParseState) (interface{}, error) {
        _, err := psc.Bind_(psc.Bind_(psc.Many(psc.Space), psc.Rune(r)), psc.Many(psc.Space))(st)
        if err == nil {
            return r, nil
        } else {
            return nil, err
        }
    }
}

var lbracket = syms('{')
var rbracket = syms('}')
var eql = syms('=')
var vbar = syms('|')

func pair(st psc.ParseState) (interface{}, error) {
    left, err := token(st)
    if err != nil {
        return nil, err
    }

    right, err := psc.Bind_(eql, psc.Either(psc.Try(token), mapExpr))(st)
    if err != nil {
        return nil, err
    }
    return kv{left.(string), right}, nil
}
func pairs(st psc.ParseState) (interface{}, error) {
    return psc.SepBy1(pair, vbar)(st)
}
func mapExpr(st psc.ParseState) (interface{}, error) {
    p, err := psc.Try(psc.Between(lbracket, rbracket, pair))(st)
    if err == nil {
        return p, nil
    }
    ps, err := psc.Between(lbracket, rbracket, pairs)(st)
    if err == nil {
        return ps, nil
    } else {
        return nil, err
    }
}

func makeMap(data interface{}) interface{} {
    ret := make(map[string]interface{})
    switch val := data.(type) {
    case kv:
        ret[val.key] = makeMap(val.value)
    case string:
        return data
    case []interface{}:
        for _, item := range val {
            it := item.(kv)
            ret[it.key] = makeMap(it.value)
        }
    }
    return ret
}

func main() {
    input := `{key1 = "\"value1\"
" | key2 = { key3 = 10 } | key4 = {key5 = { key6 = value6}}}`
    st := psc.MemoryParseState(input)
    ret, err := mapExpr(makeMap(st))
    if err == nil {
        fmt.Println(ret)
    } else {
        fmt.Println(err)
    }
}

RUN

go run parser.go

OUTPUT

map[key1:"value1"
  key2:map[key3:10] key4:map[key5:map[key6:value6]]]

This demo include escape, token, string and key/value map. You can create a parser as package or application.

If you are willing to convert your input to a standard JSON format, why create a parser when there are Go libraries that do the heavy lifting for you?

Given the following input file (/Users/lex/dev/go/data/jsoncfgo/fritjof.json):

Input File

{
   "key1": "value1",
   "key2" :  {
      "key3": "value3"
   },
   "key4": {
      "key5": {
         "key6": "value6"
      }
   }
}

Code Example

package main

import (
    "fmt"
    "log"
    "github.com/l3x/jsoncfgo"
)


func main() {

    configPath := "/Users/lex/dev/go/data/jsoncfgo/fritjof.json"
    cfg, err := jsoncfgo.ReadFile(configPath)
    if err != nil {
        log.Fatal(err.Error())  // Handle error here
    }

    key1 := cfg.RequiredString("key1")
    fmt.Printf("key1: %v

", key1)

    key2 := cfg.OptionalObject("key2")
    fmt.Printf("key2: %v

", key2)

    key4 := cfg.OptionalObject("key4")
    fmt.Printf("key4: %v

", key4)

    if err := cfg.Validate(); err != nil {
        defer log.Fatalf("ERROR - Invalid config file...
%v", err)
        return
    }
}

Output

key1: value1

key2: map[key3:value3]

key4: map[key5:map[key6:value6]]

Notes

jsoncfgo can handle any level of nested JSON objects.

For details see:

Be advised that, with Go 1.8 (currently in beta in Q4 2016, released in Q1 2017)

The yacc tool (previously available by running “go tool yacc”) has been removed.
As of Go 1.7 it was no longer used by the Go compiler.

It has moved to the “tools” repository and is now available at golang.org/x/tools/cmd/goyacc.