This code gets the page data through a proxy. It start from id of page = 1 and continue till 100000
package main
import (
"code.google.com/p/gcfg"
"database/sql"
"flag"
"fmt"
_ "github.com/go-sql-driver/mysql"
"github.com/parnurzeal/gorequest"
"log"
"strconv"
"time"
)
func main() {
t := time.Now()
initConfig()
initRoundRobin()
for i := vStart; i <= vStop; i++ {
cache("https://site.ru/url/"+strconv.Itoa(i), "url", i)
}
fmt.Println("Time: ", time.Since(t))
}
var (
db *sql.DB
config ConfigStruct
configFile = "config.gcfg"
roundRobin map[int]string
roundRobinNextId int
roundRobinMaxId int
vStart int
vStop int
vPart int
vScriptCount int
roundRobinShift int
failedRequestCount int
successRequestCount int
)
type ConfigStruct struct {
Database struct {
ConnectionString string
TableNameCache string
}
Part struct {
CurrentPart int
CurrentProxyCheckerPart int
}
}
type ProxyAddress struct {
Id int
Ip string
}
func check(e error) {
if e != nil {
panic(e)
log.Fatal(e)
}
}
Init connection to the database where stored proxy list and store all pages data.
func initConfig() {
if err := gcfg.ReadFileInto(&config, configFile); err != nil {
check(err)
}
connect, err := sql.Open("mysql", config.Database.ConnectionString)
check(err)
db = connect
vScriptCount = 100000
flag.IntVar(&roundRobinShift, "shift", 0, "")
flag.IntVar(&vPart, "part", 0, "select part")
flag.IntVar(&vStart, "start", (vPart*vScriptCount)+1, "select start id")
flag.IntVar(&vStop, "stop", (vPart+1)*vScriptCount, "select stop id")
flag.Parse()
vStart = vStart + (vPart * vScriptCount)
vStop = vStop + (vPart * vScriptCount)
successRequestCount = 0
}
Init list of proxies
func initRoundRobin() {
roundRobin = make(map[int]string)
rows, err := db.Query("SELECT id, ip FROM proxies WHERE status = 1 order by id asc")
check(err)
defer rows.Close()
for rows.Next() {
pa := new(ProxyAddress)
if err := rows.Scan(&pa.Id, &pa.Ip); err != nil {
check(err)
}
roundRobin[pa.Id] = pa.Ip
roundRobinMaxId = pa.Id
}
roundRobinNextId = roundRobinShift + successRequestCount
if roundRobinNextId > roundRobinMaxId {
roundRobinNextId = 0
}
failedRequestCount = 0
}
Function check data from database and if it is not in it - script will do a request
func cache(url string, tag string, url_id int) ([]byte, error) {
var data string
err := db.QueryRow("SELECT data FROM cache WHERE url_id=? AND tag=?", url_id, tag).Scan(&data)
if err == sql.ErrNoRows {
data, err := proxyRequest(url)
check(err)
insert, err := db.Prepare("INSERT cache SET url_id=?, url=?, tag=?, data=?, datetime=?")
datetime, err := strconv.ParseInt(time.Now().Format("20060102150405"), 10, 64)
check(err)
insert.Exec(url_id, url, tag, data, datetime)
return []byte(data), nil
}
return []byte(data), nil
}
Function make request through proxy to get page data. Each request get through new proxy server. Timeout per server = 10 sec. If server don't send a response - server will be deleted from roundrobin.
func proxyRequest(url string) ([]byte, error) {
for {
roundRobinCurrentId := getNextRoundRobinId()
proxy := roundRobin[roundRobinCurrentId]
request := gorequest.New().Timeout(10000 * time.Millisecond).Proxy("http://" + proxy)
_, body, err := request.Get(url).End()
if err != nil {
deleteRoundRobinId(roundRobinCurrentId, err[0].Error())
continue
} else {
successRequestCount++
}
return []byte(body), nil
break
}
return []byte(""), nil
}
Get next proxy server from list. If list is ended - script will wait for 30 seconds.
func getNextRoundRobinId() int {
for {
if len(roundRobin) == 0 {
initRoundRobin()
if len(roundRobin) == 0 {
fmt.Println("proxy is ended")
time.Sleep(30 * time.Second)
initRoundRobin()
}
}
if roundRobinNextId == roundRobinMaxId {
roundRobinNextId = 0
}
roundRobinNextId++
_, ok := roundRobin[roundRobinNextId]
if ok {
break
}
}
return roundRobinNextId
}
If response from proxy was bad - server will be deleted by this function. If count of servers get 5 - list of proxy will be reinit.
func deleteRoundRobinId(roundRobinId int, result string) {
db.Exec("update proxies set status = 0, response =? where id=?", result, roundRobinId)
delete(roundRobin, roundRobinId)
failedRequestCount++
if failedRequestCount == 5 {
initRoundRobin()
}
}
I run this script on 10 terminals. Some time later most of them crashed with this error:
database/sql.(*Stmt).Exec(0x0, 0xc20842be38, 0x5, 0x5, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/database/sql/sql.go:1302 +0x3e1
main.cache(0xc2082f0690, 0x23, 0x355910, 0x9, 0x12775b, 0x0, 0x0, 0x0, 0x0, 0x0)
/var/go/src/parser/index.go:148 +0x702
main.main()
/var/go/src/parser/index.go:31 +0x26c
goroutine 5 [chan receive, 213 minutes]:
database/sql.(*DB).connectionOpener(0xc2080446e0)
/usr/local/go/src/database/sql/sql.go:589 +0x4c
created by database/sql.Open
/usr/local/go/src/database/sql/sql.go:452 +0x31c
goroutine 17 [syscall, 213 minutes, locked to thread]:
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:2232 +0x1
goroutine 10854 [IO wait]:
net.(*pollDesc).Wait(0xc2081f2990, 0x72, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:84 +0x47
net.(*pollDesc).WaitRead(0xc2081f2990, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:89 +0x43
net.(*netFD).Read(0xc2081f2930, 0xc20844e800, 0x400, 0x400, 0x0, 0x584b70, 0xc2081f7ae8)
/usr/local/go/src/net/fd_unix.go:242 +0x40f
net.(*conn).Read(0xc208038390, 0xc20844e800, 0x400, 0x400, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:121 +0xdc
crypto/tls.(*block).readFromUntil(0xc20857dfb0, 0x5860a8, 0xc208038390, 0x5, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:454 +0xe6
crypto/tls.(*Conn).readRecord(0xc208484b00, 0x17, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:539 +0x2da
crypto/tls.(*Conn).Read(0xc208484b00, 0xc20834e000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:904 +0x166
net/http.noteEOFReader.Read(0x58c488, 0xc208484b00, 0xc208263398, 0xc20834e000, 0x1000, 0x1000, 0x2a0300, 0x0, 0x0)
/usr/local/go/src/net/http/transport.go:1270 +0x6e
net/http.(*noteEOFReader).Read(0xc2082a4460, 0xc20834e000, 0x1000, 0x1000, 0xc208012000, 0x0, 0x0)
<autogenerated>:125 +0xd4
bufio.(*Reader).fill(0xc2080fc060)
/usr/local/go/src/bufio/bufio.go:97 +0x1ce
bufio.(*Reader).Peek(0xc2080fc060, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/bufio/bufio.go:132 +0xf0
net/http.(*persistConn).readLoop(0xc208263340)
/usr/local/go/src/net/http/transport.go:842 +0xa4
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:660 +0xc9f
goroutine 10863 [select]:
net/http.(*persistConn).writeLoop(0xc208263ad0)
/usr/local/go/src/net/http/transport.go:945 +0x41d
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:661 +0xcbc
goroutine 10855 [select]:
net/http.(*persistConn).writeLoop(0xc208263340)
/usr/local/go/src/net/http/transport.go:945 +0x41d
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:661 +0xcbc
goroutine 10862 [IO wait]:
net.(*pollDesc).Wait(0xc208010b50, 0x72, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:84 +0x47
net.(*pollDesc).WaitRead(0xc208010b50, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:89 +0x43
net.(*netFD).Read(0xc208010af0, 0xc20852c800, 0x400, 0x400, 0x0, 0x584b70, 0xc2081af168)
/usr/local/go/src/net/fd_unix.go:242 +0x40f
net.(*conn).Read(0xc208038488, 0xc20852c800, 0x400, 0x400, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:121 +0xdc
crypto/tls.(*block).readFromUntil(0xc2085597a0, 0x5860a8, 0xc208038488, 0x5, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:454 +0xe6
crypto/tls.(*Conn).readRecord(0xc2084858c0, 0x17, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:539 +0x2da
crypto/tls.(*Conn).Read(0xc2084858c0, 0xc208415000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:904 +0x166
net/http.noteEOFReader.Read(0x58c488, 0xc2084858c0, 0xc208263b28, 0xc208415000, 0x1000, 0x1000, 0x2a0300, 0x0, 0x0)
/usr/local/go/src/net/http/transport.go:1270 +0x6e
net/http.(*noteEOFReader).Read(0xc20825bce0, 0xc208415000, 0x1000, 0x1000, 0xc208012000, 0x0, 0x0)
<autogenerated>:125 +0xd4
bufio.(*Reader).fill(0xc2080fd740)
/usr/local/go/src/bufio/bufio.go:97 +0x1ce
bufio.(*Reader).Peek(0xc2080fd740, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/bufio/bufio.go:132 +0xf0
net/http.(*persistConn).readLoop(0xc208263ad0)
/usr/local/go/src/net/http/transport.go:842 +0xa4
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:660 +0xc9f
goroutine 10867 [select]:
net/http.(*persistConn).writeLoop(0xc208263ef0)
/usr/local/go/src/net/http/transport.go:945 +0x41d
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:661 +0xcbc
goroutine 10866 [IO wait]:
net.(*pollDesc).Wait(0xc2084be1b0, 0x72, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:84 +0x47
net.(*pollDesc).WaitRead(0xc2084be1b0, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:89 +0x43
net.(*netFD).Read(0xc2084be150, 0xc208465000, 0x1000, 0x1000, 0x0, 0x584b70, 0xc2081602f0)
/usr/local/go/src/net/fd_unix.go:242 +0x40f
net.(*conn).Read(0xc208038540, 0xc208465000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:121 +0xdc
crypto/tls.(*block).readFromUntil(0xc2080cc180, 0x5860a8, 0xc208038540, 0x5, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:454 +0xe6
crypto/tls.(*Conn).readRecord(0xc2082cc000, 0x17, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:539 +0x2da
crypto/tls.(*Conn).Read(0xc2082cc000, 0xc2082cf000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:904 +0x166
net/http.noteEOFReader.Read(0x58c488, 0xc2082cc000, 0xc208263f48, 0xc2082cf000, 0x1000, 0x1000, 0x2a0300, 0x0, 0x0)
/usr/local/go/src/net/http/transport.go:1270 +0x6e
net/http.(*noteEOFReader).Read(0xc20820ed60, 0xc2082cf000, 0x1000, 0x1000, 0xc208012000, 0x0, 0x0)
<autogenerated>:125 +0xd4
bufio.(*Reader).fill(0xc208326240)
/usr/local/go/src/bufio/bufio.go:97 +0x1ce
bufio.(*Reader).Peek(0xc208326240, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/bufio/bufio.go:132 +0xf0
net/http.(*persistConn).readLoop(0xc208263ef0)
/usr/local/go/src/net/http/transport.go:842 +0xa4
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:660 +0xc9f
goroutine 10859 [select]:
net/http.(*persistConn).writeLoop(0xc208263760)
/usr/local/go/src/net/http/transport.go:945 +0x41d
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:661 +0xcbc
goroutine 10858 [IO wait]:
net.(*pollDesc).Wait(0xc2081f3790, 0x72, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:84 +0x47
net.(*pollDesc).WaitRead(0xc2081f3790, 0x0, 0x0)
/usr/local/go/src/net/fd_poll_runtime.go:89 +0x43
net.(*netFD).Read(0xc2081f3730, 0xc208418000, 0x1000, 0x1000, 0x0, 0x584b70, 0xc2081c53e8)
/usr/local/go/src/net/fd_unix.go:242 +0x40f
net.(*conn).Read(0xc208038400, 0xc208418000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/net/net.go:121 +0xdc
crypto/tls.(*block).readFromUntil(0xc208458510, 0x5860a8, 0xc208038400, 0x5, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:454 +0xe6
crypto/tls.(*Conn).readRecord(0xc208485340, 0x17, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:539 +0x2da
crypto/tls.(*Conn).Read(0xc208485340, 0xc2082f6000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/local/go/src/crypto/tls/conn.go:904 +0x166
net/http.noteEOFReader.Read(0x58c488, 0xc208485340, 0xc2082637b8, 0xc2082f6000, 0x1000, 0x1000, 0x2a0300, 0x0, 0x0)
/usr/local/go/src/net/http/transport.go:1270 +0x6e
net/http.(*noteEOFReader).Read(0xc20827a1a0, 0xc2082f6000, 0x1000, 0x1000, 0xc208012000, 0x0, 0x0)
<autogenerated>:125 +0xd4
bufio.(*Reader).fill(0xc2080fcc00)
/usr/local/go/src/bufio/bufio.go:97 +0x1ce
bufio.(*Reader).Peek(0xc2080fcc00, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/bufio/bufio.go:132 +0xf0
net/http.(*persistConn).readLoop(0xc208263760)
/usr/local/go/src/net/http/transport.go:842 +0xa4
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:660 +0xc9f
exit status 2
What it can be and how I can fix it?
Thank you!