Jsoup+HttpClient获取新浪新闻数据
package com.test; import java.io.IOException; import java.net.URI; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; /** * * 依赖 commons-httpclient-3.1.jar commons-codec-1.4.jar * * @author tianjun * */ public class PostTest { public static void main(String[] args) throws Exception { // (1)构造HttpClient的实例 CloseableHttpClient httpCLient = HttpClients.createDefault(); // 创建get请求实例 HttpGet httpget = new HttpGet() ; //设置参数 //http://roll.news.sina.com.cn/s/channel.php?ch=01#col=91&spec=&type=&ch=01&k=&offset_page=0&offset_num=0&num=60&asc=&page=NaN //http://roll.news.sina.com.cn/s/channel.php?col=91&spec=&type=&ch=01&offset_page=0&offset_num=0&num=60&page=1 URI uri = new URIBuilder() .setScheme("http") .setHost("roll.news.sina.com.cn") .setPath("/s/channel.php") .setParameter("ch", "01") .setParameter("col", "91") .setParameter("spec","") .setParameter("type", "") .setParameter("ch", "01") .setParameter("offset_page", "0") .setParameter("offset_num", "0") .setParameter("num", "60") .setParameter("page", "1") .build(); httpget.setURI(uri); //设置请求头信息 /* */ httpget.setHeader("Accep", "*/*"); httpget.setHeader("Accept-Encoding","gzip, deflate"); httpget.setHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"); httpget.setHeader("Connection","keep-alive"); httpget.setHeader("Host","roll.news.sina.com.cn"); httpget.setHeader("Referer","http://roll.news.sina.com.cn/s/channel.php?ch=01"); httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0"); httpget.setHeader("Content-Type","text/html;charset=UTF-8"); System.out.println("executing request "+httpget.getURI()); try { // 客户端执行get请求 返回响应实体 HttpResponse response = httpCLient.execute(httpget); // 服务器响应状态行 System.out.println(response.getStatusLine()); Header[] heads = response.getAllHeaders(); // 打印所有响应头 for(Header h:heads){ System.out.println(h.getName()+":"+h.getValue()); } // 获取响应消息实体 HttpEntity entity = response.getEntity(); System.out.println("------------------------------------"); if(entity != null){ //响应内容 System.out.println( new String(EntityUtils.toString(entity).getBytes("ISO-8859-1"),"gbk")); System.out.println("----------------------------------------"); // 响应内容长度 System.out.println("响应内容长度:"+entity.getContentLength()); } } catch (ClientProtocolException e){ e.printStackTrace(); } catch (IOException e){ e.printStackTrace(); }finally{ httpCLient.getConnectionManager().shutdown(); } } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。