我用
Document doc=Jsoup.connect("http://www.baidu.com").get();
System.out.println(doc);
但是却可以通过doc.getElementById("ul"); 获取里面的标签值。
这是怎么回事呢??
然后我用
// 将string转成url对象
URL realUrl = new URL(url);
URLConnection connection = realUrl.openConnection();
connection.connect();
in = new BufferedReader(new InputStreamReader(connection.getInputStream(),"utf-8"));
String line;
while ((line = in.readLine()) != null) {
result += line;
}
这种方式可以获取到网页所有的html代码。
哪位大神能给小弟指点下怎么回事?谢谢啦。
已经解决了,是因为网站有反爬机制的。。。
弄成网站可以看一下,另外可以试着加密,因为我也是小白
package temp;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.http.HttpEntity;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jsoup.nodes.Document;
public class Temp {
public static void main(String[] args) {
Document doc=null;
// try {
// doc = Jsoup.connect("http://www.baidu.com").get();
// } catch (IOException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
CloseableHttpClient httpclient = HttpClients.createDefault();
try {
HttpGet httpget = new HttpGet("http://www.baidu.com");
CloseableHttpResponse response = httpclient.execute(httpget);
try {
// 获取响应实体
HttpEntity entity = response.getEntity();
// 打印响应状态
if (entity != null) {
InputStream in = entity.getContent();
// byte[] b=new byte[in.available()];
// in.read(b);
BufferedReader br = new BufferedReader(new InputStreamReader(in, "utf-8"));
String temp = "";
String s = "";
while ((temp = br.readLine()) != null) {
s = s + temp;
}
System.out.println(s);
}
} finally {
response.close();
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
// 关闭连接,释放资源
try {
httpclient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}