在用go写淘宝的爬虫,用的selenium,但中途遇到滑块验证过不了,根据网上python的相关资料得知需修改下chromedriver,并设置开发者模式
chromedriver已经通过nodepad++修改了,但不知道怎么设置开发者模式,参考资料全都是python的,又看不懂go的源码,所以想知道怎么用go设置开发者模式,各位如果知道更好的绕过验证的代码还请麻烦告知,谢谢
import "github.com/tebeka/selenium"
ops := []selenium.ServiceOption{}
service, _ := selenium.NewChromeDriverService(seleniumPath, port, ops...)
defer service.Stop()
caps := selenium.Capabilities{
"browserName": "chrome",
}
wd, _ := selenium.NewRemote(caps, "http://127.0.0.1:9515/wd/hub")
defer wd.Quit()
time.Sleep(time.Second * 1)
//这是设置参数的部分代码
在代码里执行这段js语句(selenium go的调用jsapi我不太熟悉)
js1= '''Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) '''
你可以使用gchromedp:(chromedp是一个更快、更简单的Golang库用于调用支持Chrome DevTools协议的浏览器,同时不需要额外的依赖(例如Selenium和PhantomJS)。)
import github.com/chromedp/chromedp
func main() { cookies,err := XieQuCookies() if err!=nil { panic(err) } fmt.Println(cookies)}//获取cookiefunc XieQuCookies() (string, error) {
ua:="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
options := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", true), // debug使用
chromedp.Flag("blink-settings", "imagesEnabled=false"), //不显示图片
chromedp.UserAgent(ua), //自定义ua
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...)
defer cancel()
// create context
ctx, cancel := chromedp.NewContext(allocCtx,chromedp.WithLogf(log.Printf))
defer cancel()
// create a timeout
ctx, cancel = context.WithTimeout(ctx, 30*time.Second)
defer cancel()
//cookie
cookiesVal:=""
err := chromedp.Run(ctx,
//设置webdriver检测反爬
chromedp.ActionFunc(func(cxt context.Context) error {
_, err := page.AddScriptToEvaluateOnNewDocument("Object.defineProperty(navigator, 'webdriver', { get: () => false, });").Do(cxt)
return err
}),
//登录链接
chromedp.Navigate(`https://www.xiequ.cn/iplogin.aspx`),
//等待页面元素加载完成
chromedp.WaitVisible(`#nc_1_n1z`),
chromedp.Sleep(time.Second*1),
//账号
chromedp.SetValue(`input[name='LoginName']`, "username", chromedp.ByQuery),
//密码
chromedp.SetValue(`input[name='LoginPassword']`, "123456", chromedp.ByQuery),
//模拟滑动验证
chromedp.QueryAfter("#nc_1_n1z", func(fctx context.Context, id runtime.ExecutionContextID, node ...*cdp.Node) error {
n:=node[0]
return MouseDragNode(n, fctx)
}),
chromedp.Sleep(time.Millisecond*300),
//点击登录
chromedp.Click(`.content1-s3-p4`, chromedp.ByQuery),
chromedp.Sleep(time.Millisecond*300),
//获取cookie
chromedp.ActionFunc(func(cctx context.Context) error {
for i:=0; i<5; i++ {
cookes,err:=network.GetAllCookies().Do(cctx)
if err!=nil {
return err
}
var cookieStr bytes.Buffer
for _, v := range cookes {
cookieStr.WriteString(v.Name + "=" + v.Value + ";")
}
cookiesVal = cookieStr.String()
if strings.Contains(cookiesVal,"acw_tc") {
break
}
time.Sleep(time.Millisecond*500)
}
return nil
}),
)
return cookiesVal, err
}
滑动验证
//模拟滑动func MouseDragNode(n *cdp.Node, cxt context.Context) error {
boxes, err := dom.GetContentQuads().WithNodeID(n.NodeID).Do(cxt)
if err!=nil {
return err
}
if len(boxes) == 0 {
return chromedp.ErrInvalidDimensions
}
content := boxes[0]
c := len(content)
if c%2 != 0 || c < 1 {
return chromedp.ErrInvalidDimensions
}
var x, y float64
for i := 0; i < c; i += 2 {
x += content[i]
y += content[i+1]
}
x /= float64(c / 2)
y /= float64(c / 2)
p := &input.DispatchMouseEventParams{
Type: input.MousePressed,
X: x,
Y: y,
Button: input.Left,
ClickCount: 1,
}
// 鼠标左键按下
if err := p.Do(cxt); err != nil {
return err
}
// 拖动
p.Type = input.MouseMoved
max := 380.0
for {
if p.X > max {
break
}
rt := rand.Intn(20) + 20
chromedp.Run(cxt, chromedp.Sleep(time.Millisecond * time.Duration(rt)))
x := rand.Intn(2) + 15
y := rand.Intn(2)
p.X = p.X + float64(x)
p.Y = p.Y + float64(y)
//fmt.Println("X坐标:",p.X)
if err := p.Do(cxt); err != nil {
return err
}
}
// 鼠标松开
p.Type = input.MouseReleased
return p.Do(cxt)
}
补充:
这个库github.com/chromedp/chromedp的说明请参考:https://blog.csdn.net/yes169yes123/article/details/109562220
通过Capabilities的Args这个参数设置
caps = selenium.Capabilities{
"browserName": "chrome",
}
imagCaps := map[string]interface{}{
}
chromeCaps := chrome.Capabilities{
Prefs: imagCaps,
Path: "",
Args: []string{
"--headless", // 设置Chrome无头模式
"--no-sandbox",
"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/604.4.7 (KHTML, like Gecko) Version/11.0.2 Safari/604.4.7", // 模拟user-agent,防反爬
},
}
caps.AddChrome(chromeCaps)
具体参数参照
https://www.cnblogs.com/xuchunlin/p/16375517.html
淘宝有浏览器驱动识别,同selenium 没用