type MyStruct struct {
Value json.RawMessage `json:"value"`
}
var resp *http.Response
if resp, err = http.DefaultClient.Do(req); err == nil {
if resp.StatusCode == 200 {
var buffer []byte
if buffer, err = ioutil.ReadAll(resp.Body); err == nil {
mystruct = &MyStruct{}
err = json.Unmarshal(buffer, mystruct)
}
}
}
fmt.Println(string(mystruct.Value))
it produces something like:
\u003Chead>
\u003C/head>
\u003Cbody>
Doc at: http://golang.org/pkg/encoding/json/#Unmarshal
says: When unmarshaling quoted strings, invalid UTF-8 or invalid UTF-16 surrogate pairs are not treated as an error. Instead, they are replaced by the Unicode replacement character U+FFFD.
I kinda think this is what is going on. I just can't see the answer as my experience with go is minimal and I'm tired.
You decided to use json.RawMessage
to prevent parsing of the value with key value
in your json message.
The string literal "\u003chtml\u003e"
is a valid json representation of "<html>"
.
Since you told json.Unmarshal
not to parse this part, it does not parse it and returns it to you as-is.
If you want to have it parsed into an UTF-8 string, then change the definition of MyStruct
to:
type MyStruct struct {
Value string `json:"value"`
}
There is a way to convert escaped unicode characters in json.RawMessage
into just valid UTF8 characters without unmarshalling it. (I had to deal with the issue since my primary language is Korean.)
You can use the strconv.Quote()
and strconv.Unquote()
to do the conversion.
func _UnescapeUnicodeCharactersInJSON(_jsonRaw json.RawMessage) (json.RawMessage, error) {
str, err := strconv.Unquote(strings.Replace(strconv.Quote(string(_jsonRaw)), `\\u`, `\u`, -1))
if err != nil {
return nil, err
}
return []byte(str), nil
}
func main() {
// Both are valid JSON.
var jsonRawEscaped json.RawMessage // json raw with escaped unicode chars
var jsonRawUnescaped json.RawMessage // json raw with unescaped unicode chars
// '\u263a' == '☺'
jsonRawEscaped = []byte(`{"HelloWorld": "\uC548\uB155, \uC138\uC0C1(\u4E16\u4E0A). \u263a"}`) // "\\u263a"
jsonRawUnescaped, _ = _UnescapeUnicodeCharactersInJSON(jsonRawEscaped) // "☺"
fmt.Println(string(jsonRawEscaped)) // {"HelloWorld": "\uC548\uB155, \uC138\uC0C1(\u4E16\u4E0A). \u263a"}
fmt.Println(string(jsonRawUnescaped)) // {"HelloWorld": "안녕, 세상(世上). ☺"}
}
https://play.golang.org/p/pUsrzrrcDG-
Hope this helps :D