使用自定义解组器处理嵌套的JSON结构

I'm dealing with a legacy system that's returning JSON with nested structs and some optional fields (and in random order). Something like this:

type A struct {
    /* simple struct, can be unmarshalled normally */
    AF1 string `json:"AF1"`
}

type B struct {
    /* simple struct, can be unmarshalled normally */
    BF1 string `json:"BF1"`
}

type X struct {
    Things []A `json:"things"` /* mandatory */
    Thangs []B `json:"thangs"` /* mandatory */
    /* some individual string values may or may not appear, eg:
    Item1 string
    Item2 string
    */         
}

If Item[12] do appear, I want to stash them in a map or similar.

Is there any elegant way to unmarshal X? Is there some way to write a custom UnmarshalJSON func for X (to handle option string fields), and then hand off to the default JSON unmarshaller for A and B?

If I understand the problem correctly from your additional comment, then input might contain any arbitrary extra fields with unknown names (and types?) and you want/need access to these. If it's just for later re-marshalling then the json.RawMessage type would be of interest.

Ideally encoding/json would have a special tag (like the ",any" encoding/xml tag) that would automatically collect any extra/unreferenced JSON items into either a map[string]interface{} or a map[string]json.RawMessage field. However I couldn't find any such feature nor figure out an obvious way to emulate it with anonymous structs (but I didn't try very hard).

Edit: There is an open issue in the Go project for this feature. Apparently a change was submitted and partially reviewed around Go 1.2 but ended up not getting accepted.

Failing that, there are a couple of ways you can do exactly what you suggest, make custom (un)marshaller for X and call back into the json package to handle []A and []B.

Here is an example quickly thrown together, there may be better/clearer/safer ways to do this. (Throughout this example, A and B can be arbitrarily complex, perhaps containing types that themselves have custom (un)marshalling methods.)

package main

import (
    "encoding/json"
    "fmt"
)

type A struct {
    AF1 string
}

type B struct {
    BF1 string
}

type X struct {
    Things []A
    Thangs []B

    // Or perhaps json.RawMessage if you just
    // want to pass them through.
    // Or map of string/int/etc if the value type is fixed.
    Extra map[string]interface{}
}

// Marshal Way 1: call unmarshal twice on whole input

type xsub struct {
    Things []A `json:"things"`
    Thangs []B `json:"thangs"`
}

func (x *X) _UnmarshalJSON(b []byte) error {
    // First unmarshall the known keys part:
    var tmp xsub
    if err := json.Unmarshal(b, &tmp); err != nil {
        return err
    }

    // Then unmarshall the whole thing again:
    var vals map[string]interface{}
    if err := json.Unmarshal(b, &vals); err != nil {
        return err
    }

    // Everything worked, chuck the map entries for
    // "known" fields and store results.
    delete(vals, "things")
    delete(vals, "thangs")
    x.Things = tmp.Things
    x.Thangs = tmp.Thangs
    x.Extra = vals
    return nil
}

// Way 2:

func (x *X) UnmarshalJSON(b []byte) error {
    // Only partially decode:
    var tmp map[string]json.RawMessage
    if err := json.Unmarshal(b, &tmp); err != nil {
        return err
    }

    // Now handle the known fields:
    var things []A
    if err := json.Unmarshal(tmp["things"], &things); err != nil {
        return err
    }
    var thangs []B
    if err := json.Unmarshal(tmp["thangs"], &thangs); err != nil {
        return err
    }

    // And the unknown fields.
    var extra map[string]interface{}

    // Either:
    if true {
        // this has more calls to Unmarshal, but may be more desirable
        // as it completely skips over the already handled things/thangs.
        delete(tmp, "things")
        delete(tmp, "thangs")
        // If you only needed to store the json.RawMessage for use
        // in MarshalJSON then you'd just store "tmp" and stop here.

        extra = make(map[string]interface{}, len(tmp))
        for k, raw := range tmp {
            var v interface{}
            if err := json.Unmarshal(raw, &v); err != nil {
                return err
            }
            extra[k] = v
        }
    } else { // Or:
        // just one more call to Unmarshal, but it will waste
        // time with things/thangs again.
        if err := json.Unmarshal(b, &extra); err != nil {
            return err
        }
        delete(extra, "things")
        delete(extra, "thangs")
    }

    // no error, we can store the results
    x.Things = things
    x.Thangs = thangs
    x.Extra = extra
    return nil
}

func (x X) MarshalJSON() ([]byte, error) {
    // abusing/reusing x.Extra, could copy map instead
    x.Extra["things"] = x.Things
    x.Extra["thangs"] = x.Thangs
    result, err := json.Marshal(x.Extra)
    delete(x.Extra, "things")
    delete(x.Extra, "thangs")
    return result, err
}

func main() {
    inputs := []string{
        `{"things": [], "thangs": []}`,

        `
{
    "things": [
    {
        "AF1": "foo"
    },
    {
        "AF1": "bar"
    }
    ],
    "thangs": [
        {
            "BF1": "string value"
        }
    ],
    "xRandomKey":       "not known ahead of time",
    "xAreValueTypesKnown": 172
}`,
    }

    for _, in := range inputs {
        fmt.Printf("
Unmarshal(%q):
", in)
        var x X
        err := json.Unmarshal([]byte(in), &x)
        if err != nil {
            fmt.Println("unmarshal:", err)
        } else {
            fmt.Printf("\tas X: %+v
", x)
            fmt.Printf("\twith map: %v
", x.Extra)
            out, err := json.Marshal(x)
            if err != nil {
                fmt.Println("marshal:", err)
                continue
            }
            fmt.Printf("\tRemarshals to: %s
", out)
        }
    }
}

Run on Playground

Declare Items1/2 as map[string]interface{} per the JSON object type in json.Unmarshal(...). If they are missing they will simply be set as nil:

type X struct {
  // ...
  Item1 string map[string]interface{}
  Item2 string map[string]interface{}

Note also that if a field name matches the JSON key name (case-insensitive) then there is no need to include a json:"..." name tag for it.

type A struct {
  AF1 string // Will look for keys named "AF1", "af1", etc.
}