package main
import (
"bufio"
"io"
"golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
"net/http"
"fmt"
"golang.org/x/text/transform"
"io/ioutil"
)
// main
func main() {
resp, err := http.Get("http://www.baidu.com")
if err != nil {
panic(err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Println("Error: status code", resp.StatusCode)
return
}
e := determineEncoding(resp.Body)
utf8Reader := transform.NewReader(resp.Body, e.NewDecoder())
all, err := ioutil.ReadAll(utf8Reader)
if err != nil {
panic(err)
}
fmt.Printf("%s
", all)
}
// determine
func determineEncoding(r io.Reader) encoding.Encoding {
reader := bufio.NewReader(r)
// The start position was not correct
bytes, err := reader.Peek(1024)
if err != nil {
panic(err)
}
e, _, _ := charset.DetermineEncoding(bytes, "")
return e
}
The result is not correct data. The start position is not zero.
As document describe 'Peek returns the next n bytes without advancing the reader. The bytes stop being valid at the next read call. If Peek returns fewer than n bytes, it also returns an error explaining why the read is short. The error is ErrBufferFull if n is larger than b's buffer size.'
Peek returns the next n bytes without advancing the reader.
This refers to the *bufio.Reader
, not the underlying reader. The buffered reader will read from the underlying reader if necessary. How else would it return the bytes?
In your case, you have to stop using the response body directly after calling determineEncoding
and use the *bufio.Reader instead.
For instance:
func determineEncoding(r *bufio.Reader) encoding.Encoding {
bytes, err := r.Peek(1024)
// as before
}
func main() {
// as before
defer resp.Body.Close()
r := bufio.NewReader(resp.Body)
e := determineEncoding(r)
utf8Reader := transform.NewReader(r, e.NewDecoder())
// as before
}