package p8;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
public class Search {
public static String getHtml(String urlString) {
try {
StringBuffer html = new StringBuffer();
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
BufferedReader br = new BufferedReader(isr);
;
String temp;
while ((temp = br.readLine()) != null) {
html.append(temp).append("\n");
}
br.close();
isr.close();
return html.toString();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) {
System.out.println(Search.getHtml("https://www.hao123.com/"));
}
}
最后的结果中中文都是乱码,怎么解决?
设置工作空间字符集编码为utf-8就可以了。
详细为 点击 window --> Preferences --> General --> Workspace 。
看看https://www.hao123.com/的编码,
html.append(temp).append("\n");加上相应的编码转换一下
测试了了一下代码,并没有出现中文乱码,试一下在对java文件 右键----Propertites--Resource中看一下文件的编码格式
选择工程项目-----右击选择properties--------resource选项中将Other中改成UTF-8
html.append(new String(temp.toString,"utf-8"))