Jsoup+HttpClient获取新浪新闻数据

package com.test;

import java.io.IOException;  
import java.net.URI;  

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse; 
import org.apache.http.client.ClientProtocolException; 
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder; 
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
  
 
 

/**
 *
 * 依赖 commons-httpclient-3.1.jar commons-codec-1.4.jar
 * 
 * @author tianjun
 *
 */
public class PostTest {

	public static void main(String[] args) throws  Exception  {   
		  
		  
		// (1)构造HttpClient的实例  
		  
		CloseableHttpClient httpCLient = HttpClients.createDefault(); 
		 
		  
          
        // 创建get请求实例  
        HttpGet httpget = new HttpGet() ;
        
        //设置参数
 //http://roll.news.sina.com.cn/s/channel.php?ch=01#col=91&spec=&type=&ch=01&k=&offset_page=0&offset_num=0&num=60&asc=&page=NaN
 //http://roll.news.sina.com.cn/s/channel.php?col=91&spec=&type=&ch=01&offset_page=0&offset_num=0&num=60&page=1       

          URI uri = new URIBuilder()
          .setScheme("http")
        .setHost("roll.news.sina.com.cn")
        .setPath("/s/channel.php")
        .setParameter("ch", "01")
        .setParameter("col", "91")
         .setParameter("spec","")
         .setParameter("type", "")
         .setParameter("ch", "01")
         .setParameter("offset_page", "0")
         .setParameter("offset_num", "0")
          .setParameter("num", "60")
        .setParameter("page", "1")
        
         
        .build();
        
        httpget.setURI(uri);
      //设置请求头信息 
        
/* */        
       
        httpget.setHeader("Accep", "*/*");
        httpget.setHeader("Accept-Encoding","gzip, deflate");
        httpget.setHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
        httpget.setHeader("Connection","keep-alive");
        httpget.setHeader("Host","roll.news.sina.com.cn");
        httpget.setHeader("Referer","http://roll.news.sina.com.cn/s/channel.php?ch=01");
        httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0");
        httpget.setHeader("Content-Type","text/html;charset=UTF-8");
        
 
 
        
        System.out.println("executing request "+httpget.getURI());  
          
        try  
        {  
              
            // 客户端执行get请求 返回响应实体  
            HttpResponse response = httpCLient.execute(httpget);  
              
            // 服务器响应状态行  
            System.out.println(response.getStatusLine());  
              
            Header[] heads = response.getAllHeaders();  
            // 打印所有响应头  
            for(Header h:heads){  
                System.out.println(h.getName()+":"+h.getValue());  
            }  
              
            // 获取响应消息实体  
            HttpEntity entity = response.getEntity();  
              
            System.out.println("------------------------------------");  
              
              
              
            if(entity != null){  
                                  
                //响应内容  
                System.out.println( new String(EntityUtils.toString(entity).getBytes("ISO-8859-1"),"gbk"));  
                  
                System.out.println("----------------------------------------");  
                // 响应内容长度  
                System.out.println("响应内容长度:"+entity.getContentLength());  
            }  
              
        } catch (ClientProtocolException e){  
            e.printStackTrace();  
        } catch (IOException e){  
            e.printStackTrace();  
        }finally{  
            httpCLient.getConnectionManager().shutdown();  
        }  
    }  

	 
}

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。