I have a JSON file with a single field that takes a huge amount of space when loaded into memory. The other fields are reasonable, but I'm trying to take care not to load that particular field unless I absolutely have to.
{
"Field1": "value1",
"Field2": "value2",
"Field3": "a very very long string that potentially takes a few GB of memory"
}
When reading that file into memory, I'd want to ignore Field3
(because loading it could crash my app). Here's some code that I would assume does that because it uses io streams rather than passing a []byte
type to the Unmarshal
command.
package main
import (
"encoding/json"
"os"
)
func main() {
type MyStruct struct {
Field1 string
Field2 string
}
fi, err := os.Open("myJSONFile.json")
if err != nil {
os.Exit(2)
}
// create an instance and populate
var mystruct MyStruct
err = json.NewDecoder(fi).Decode(&mystruct)
if err != nil {
os.Exit(2)
}
// do some other stuff
}
The issue is that the built-in json.Decoder
type reads the entire file into memory on Decode
before throwing away key-values that don't match the struct
's fields (as has been pointed out on StackOverflow before: link).
Are there any ways of decoding JSON in Go without keeping the entire JSON object in memory?
You could write a custom io.Reader
that you feed to json.Decoder
and that will pre-read your json file and skip that specific field.
The other option is to write your own decoder, more complicated and messy.
//edit it seemed like a fun exercise, so here goes:
type IgnoreField struct {
io.Reader
Field string
buf bytes.Buffer
}
func NewIgnoreField(r io.Reader, field string) *IgnoreField {
return &IgnoreField{
Reader: r,
Field: field,
}
}
func (iF *IgnoreField) Read(p []byte) (n int, err error) {
if n, err = iF.Reader.Read(p); err != nil {
return
}
s := string(p)
fl := `"` + iF.Field + `"`
if i := strings.Index(s, fl); i != -1 {
l := strings.LastIndex(s[0:i], ",")
if l == -1 {
l = i
}
iF.buf.WriteString(s[0:l])
s = s[i+1+len(fl):]
i = strings.Index(s, `"`)
if i != -1 {
s = s[i+1:]
}
for {
i = strings.Index(s, `"`) //end quote
if i != -1 {
s = s[i+1:]
fmt.Println("Skipped")
break
} else {
if n, err = iF.Reader.Read(p); err != nil {
return
}
s = string(p)
}
}
iF.buf.WriteString(s)
}
ln := iF.buf.Len()
if ln >= len(p) {
tmp := iF.buf.Bytes()
iF.buf.Reset()
copy(p, tmp[0:len(p)])
iF.buf.Write(p[len(p):])
ln = len(p)
} else {
copy(p, iF.buf.Bytes())
iF.buf.Reset()
}
return ln, nil
}
func main() {
type MyStruct struct {
Field1 string
Field2 string
}
fi, err := os.Open("myJSONFile.json")
if err != nil {
os.Exit(2)
}
// create an instance and populate
var mystruct MyStruct
err := json.NewDecoder(NewIgnoreField(fi, "Field3")).Decode(&mystruct)
if err != nil {
fmt.Println(err)
}
fmt.Println(mystruct)
}