@Configuration
public class SpiderConfig {
/**
* 主程序
*/
public void spiderMain() {
//网络代理,监听请求获取响应用
BrowserMobProxy browserMobProxy = getBrowserMobProxy();
Proxy seleniumProxy = ClientUtil.createSeleniumProxy(browserMobProxy);
//自动化测试驱动器
WebDriver driver = getDriver(seleniumProxy);
//打开网页
driver.get("网页链接");
driver.findElement(By.xpath("//*[@id=\"sensor_select\"]")).click();
List sensorsEl =
driver.findElement(By.xpath("//*[@id=\"sensor_select\"]/ul[2]")).findElements(By.className("node"));
//遍历传感器
for (WebElement sensorEl : sensorsEl){
//传感器的选择
driver.findElement(By.xpath("//*[@id=\"sensor_select\"]")).click();
sensorEl.click();
//点击查询
driver.findElement(By.xpath("//*[@id=\"queryBtn\"]")).click();
//等待折线图生成(等待请求数据返回)
new WebDriverWait(driver, 10).
until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//*[@id=\"history_echart\"]")));
}
// 获取返回的请求内容
Har har = browserMobProxy.getHar();
for (HarEntry entry : har.getLog().getEntries()) {//这儿就是获取所有的请求响应的数据
if (!entry.getRequest().getUrl().contains("visitor!getHistory.action")) continue;
System.out.println("Request URL: " + entry.getRequest().getUrl());
System.out.println("Entry response status: " + entry.getResponse().getStatus());
System.out.println("Entry response text: " + entry.getResponse().getContent().getText());
}
}
/**
* 获取webDriver
* @return
*/
public WebDriver getDriver(Proxy seleniumProxy){
System.setProperty("webdriver.chrome.driver", "D:\\peng_YuJun\\JavaWeb\\tools\\Spider\\chromedriver\\chromedriver.exe");
ChromeOptions chromeOptions = new ChromeOptions();
chromeOptions.setProxy(seleniumProxy);
//防止被网站检测出来为自动化测试
chromeOptions.setExperimentalOption("excludeSwitches", Collections.singletonList("enable-automation"));
chromeOptions.setExperimentalOption("useAutomationExtension", false);
ChromeDriver driver = new ChromeDriver(chromeOptions);
return driver;
}
/**
* 获取BrowserMobProxy
* @return
*/
public BrowserMobProxy getBrowserMobProxy(){
BrowserMobProxy browserMobProxy = new BrowserMobProxyServer();
browserMobProxy.start();
browserMobProxy.enableHarCaptureTypes(CaptureType.REQUEST_CONTENT, CaptureType.RESPONSE_CONTENT);
browserMobProxy.setHarCaptureTypes(CaptureType.RESPONSE_CONTENT);
browserMobProxy.newHar("pyj"); //自己指定一个名字,随便写
return browserMobProxy;
}
}
不要紧的,看了一下日志,googleapis.com/144.251.43.10:443这个应用是谷歌的一个在线应用,国内访问不到太正常了。可以忽略,在代码中的话,可以使用try()catch()来进行异常捕获和自定义处理就可以规避了。