我在网上查了查,好像应该在次请求一下返回的location,但我不知道该怎么改代码.麻烦高手帮我看看.没多少分了
下面是我的代码,我请求的是一个测姻缘的网站..
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.InetAddress;
import java.net.Socket;
import java.net.URLEncoder;
public class GetYinYuan {
public static String getResult(){
StringBuffer sb=new StringBuffer();
String boy="张三";
String girl="李四";
try {
String data="boy="+URLEncoder.encode(boy,"UTF-8")+"&girl="+URLEncoder.encode(girl, "UTF-8");
String hostname="www.guabu.com";
int port=80;
InetAddress iaddr=InetAddress.getByName(hostname);
Socket socket=new Socket(iaddr,port);
String path="/yinyuan/yuanfen.asp?"+data;
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(),"GBK"));
bw.write("GET "+path+" HTTP/1.0\r\n");
bw.write("\r\n");
bw.write(data);
bw.flush();
BufferedReader br=new BufferedReader(new InputStreamReader(socket.getInputStream(),"UTF-8"));
String line;
boolean flag=false;
while((line=br.readLine())!=null){
System.out.println(line);
}
bw.close();
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return sb.toString();
}
public static void main(String[] args) {
getResult();
}
}
下面是返回的代码
HTTP/1.1 301 Moved Permanently
Content-Length: 148
Content-Type: text/html
Location: http://www.huochepiao.com
Server: Microsoft-IIS/6.0
X-Powered-By: ASP.NET
Date: Mon, 04 Apr 2011 15:50:52 GMT
Connection: close
好吧: 66626830
分析返回的代码,然后提取地址,再次访问,扒下来
import java.net.*;
import java.io.*;
public class URLSender {
/** * @param args */
public static void main(String[] args) throws IOException
{
Socket socket = null;
try {
socket=new Socket("www.guabu.com",80);
OutputStream out=socket.getOutputStream();
BufferedReader in = new BufferedReader(new InputStreamReader(socket.getInputStream()));
ByteArrayOutputStream baos=new ByteArrayOutputStream();
//baos.write("data=abc\r\n".getBytes());
//System.out.println("Data Lenth:"+baos.toByteArray().length);
byte[] bytes = ("GET /yinyuan/yuanfen.asp?boy=李涛&girl=李进 HTTP/1.1\n" +
"Host:www.guabu.com\n" +
"Connection:close\n" +
// "User-agent:Mozilla/4.0\n" +
"Accept-language:zh-cn\n" +
"\n").getBytes("GBK");
out.write(bytes);
out.flush();
//read the response
boolean loop = true;
StringBuffer sb = new StringBuffer(8096);
while (loop) {
if (in.ready()) {
int i = 0;
while (i != -1) {
i = in.read();
sb.append((char) i);
}
loop = false;
}
//Thread.currentThread().sleep(50);
}
//display the response to the out console
System.out.println(sb.toString());
socket.close();
} catch (UnknownHostException e) {
System.err.println("Don't know about host: Victest.");
System.exit(1);
} catch (IOException e) {
System.err.println("Couldn't get I/O for " + "the connection to: Victest.");
System.exit(1);
}
}
}
上述代码即可实现,使用http watch即可知道服务器使用编码为GB2312,所以你的编码需要修改
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.InetAddress;
import java.net.Socket;
import java.net.URLEncoder;
public class GetYinYuan {
public static String getResult() {
StringBuffer sb = new StringBuffer();
String boy = "张三";
String girl = "李四";
try {
String data = "boy=" + boy + "&girl="
+ girl;
String hostname = "www.guabu.com";
int port = 80;
InetAddress iaddr = InetAddress.getByName(hostname);
Socket socket = new Socket(iaddr, port);
String path = "/yinyuan/yuanfen.asp?" + data;
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
socket.getOutputStream(), "GBK"));
bw.write("GET " + path + " HTTP/1.1\n");
bw.write("Host:www.guabu.com\n");
bw.write("Connection:close\n");
bw.write("Accept-language:zh-cn\n");
bw.write("Accept-language:zh-cn\n");
bw.write("\n");
// bw.write(data);
bw.flush();
BufferedReader br = new BufferedReader(new InputStreamReader(socket
.getInputStream(), "GBK"));
String line;
boolean flag = false;
while ((line = br.readLine()) != null) {
System.out.println(line);
}
bw.close();
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return sb.toString();
}
public static void main(String[] args) {
getResult();
}
}
Accept-Language头字段用于指定客户机期望服务器返回哪个国家语言的文档
HTTP/1.0为什么改成1.1? 你用httpwatch 知道服务器协议版本为1.1
为什么要再次加这句bw.write("Host:www.guabu.com\n"); ?
你请求是那个主机
这局又是什么意思bw.write("Connection:close\n");?
表明你请求时短连接,避免长连接的请求
又为什么写两遍?bw.write("Accept-language:zh-cn\n");
写一句就可以, 笔误