public class TestClient {
public static void main( String[] args ) throws Exception {
TestClient.run();
}
private static void run() throws Exception {
CloseableHttpClient httpClient = HttpClients.createDefault();
StringBuffer data = new StringBuffer();
for (int i = 1; i <= 2; i++) {
String url = "PageNum" + i + "" ;
HttpGet httpGet = new HttpGet("https://web-drcn.hispace.dbankcloud.cn/uowap/index?method=" +
"internal.user.commenList3&serviceType=20&reqPageNum=1&maxResults=25&appid=C10428146&version" +
"=10.0.0&zone=&locale=zh" );
try {
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
HttpEntity entity = httpResponse.getEntity();
int statusCode = httpResponse.getStatusLine().getStatusCode();
System.out.println(statusCode);
InputStream is = entity.getContent();
//设置请求头,将爬虫伪装成浏览器
httpGet.setHeader("User-Agent", "Mozilla/5.0(Windows NT 6.1)AppleWebKit/537.36" +
"(KHTML,like Gecko)Chrome/74.0.3729.169 Safari/537.36");
HttpHost proxy = new HttpHost("112.85.168.223", 9999);
RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
httpGet.setConfig(config);
BufferedReader bfr = new BufferedReader(new InputStreamReader(is));
String line = null;
StringBuffer stringBuffer = new StringBuffer();
while ((line = bfr.readLine()) != null) {
stringBuffer.append(line);
}
System.out.println(stringBuffer.toString());
CommentResponseBean commentResponseBean = JSON.parseObject(stringBuffer.toString(), CommentResponseBean.class);
System.out.println(1);
if (commentResponseBean != null) {
List<CommentResponseBean.ListDTO> comments = commentResponseBean.getList();
for (CommentResponseBean.ListDTO comment : comments) {
System.out.println(comment.getCommentInfo());
}
writeCsv(comments);
System.out.println("__________________________________");
readCsv();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
可以用字符串拼接网址,实现每次传递的url不同从而实现翻页的效果(一般翻页网页的网址都是有顺序可言的,仔细观察就能发现翻页时网址的变化)