I want to scrape a list of url to using gocolly
func main() {
fileName := "output.txt"
var result string
f, err := os.Create(fileName)
if err != nil {
panic(err)
}
defer func() {
if err := f.Close(); err != nil {
panic(err)
}
}()
rows := ReadInput()
q := AddUrl(rows)
// Instantiate default collector
c := colly.NewCollector()
c.OnHTML("body", func(e *colly.HTMLElement) {
result = result + e.Text +"
"
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("visiting", r.URL)
})
// Set error handler
c.OnError(func(r *colly.Response, err error) {
fmt.Println("Request URL:", r.Request.URL, "failed with response:", r, "
Error:", err)
})
q.Run(c)
f.WriteString(result)
log.Printf("Scraping done, Please check file %q for results
", fileName)
}
func ReadInput() []string{
// Read from file
b, err := ioutil.ReadFile("input.txt") // just pass the file name
if err != nil {
fmt.Print(err)
}
str := string(b) // convert content to a 'string'
// split each row
rows := strings.Split(str,"
")
return rows
}
But when I am trying to add url from slice of string(url) to gocolly queue it doesn't add all url, just added the last url.
func AddUrl(rows []string) *queue.Queue {
Q, _ := queue.New(
2, // Number of consumer threads
&queue.InMemoryQueueStorage{MaxSize: 10000},
)
for _,url:=range rows{
Q.AddURL(url)
}
return Q
}
instead of loop if I add url maually then it's work perfectly, but with loop it just add the last element.
func AddUrl(rows []string) *queue.Queue {
Q, _ := queue.New(
2, // Number of consumer threads
&queue.InMemoryQueueStorage{MaxSize: 10000},
)
Q.AddURL("http://bakeshopva.com")
Q.AddURL("http://zekescoffeedc.com")
return Q
}