使用fastjson解析json抓取新浪新闻文章
首先看看2个简单的fastjson的使用
例子一
package ivyy.taobao.com.domain.json; import java.util.Iterator; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; /** * @Author:jilongliang * @Date:2014-12-19 * @Version:1.0 * @Description: */ public class JsonTest1 { public static void main(String[] args) { //[{"age":22,"sex":"男","userName":"xiaoliang"},{"age":22,"sex":"男","userName":"xiaoliang"}] StringBuffer buff=new StringBuffer(); buff.append("["); buff.append("{"); buff.append("‘age‘").append(":").append("22").append(","); buff.append("‘sex‘").append(":").append("‘男‘").append(","); buff.append("‘userName‘").append(":").append("‘周伯通‘").append(""); buff.append("}"); buff.append(",");//第一个数组结尾 buff.append("{"); buff.append("‘age‘").append(":").append("22").append(","); buff.append("‘sex‘").append(":").append("‘男‘").append(","); buff.append("‘userName‘").append(":").append("‘令狐冲‘").append(""); buff.append("}"); buff.append("]"); String jsonStr=buff.toString(); JSONArray jarr=JSONArray.parseArray(jsonStr);//JSON.parseArray(jsonStr); for (Iterator iterator = jarr.iterator(); iterator.hasNext();) { JSONObject job=(JSONObject)iterator.next(); String age=job.get("age").toString(); System.out.println(age); } } }
?例子二
package ivyy.taobao.com.domain.json; import ivyy.taobao.com.entity.Classz; import ivyy.taobao.com.entity.Student; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; /** * @Author:jilongliang * @Date:2014-12-19 * @Version:1.0 * @Description: */ public class JsonTest2 { public static void main(String[] args) { Student stu1=new Student(); stu1.setAge(22); stu1.setUserName("xiaoliang"); stu1.setSex("男"); Classz claz1=new Classz(); claz1.getStudents().add(stu1); String jsonStr=JSON.toJSONString(claz1); JSONObject jsonObj=new JSONObject(); Object obj=jsonObj.parse(jsonStr); System.out.println(obj); Classz clz=JSON.parseObject(jsonStr, Classz.class); Student st=clz.getStudents().get(0); System.out.println(st.getSex()); } }
?1和2的实体
package ivyy.taobao.com.entity; import java.io.Serializable; import java.util.ArrayList; import java.util.List; /** *@Author:liangjl *@Date:2014-12-19 *@Version:1.0 *@Description: */ public class Classz implements Serializable{ private List<Student> students=new ArrayList<Student>(); public List<Student> getStudents() { return students; } public void setStudents(List<Student> students) { this.students = students; } }
?
package ivyy.taobao.com.entity; import java.io.Serializable; /** *@Author:liangjl *@Date:2014-12-19 *@Version:1.0 *@Description: */ public class Student implements Serializable{ private Integer age; private String sex; private String userName; public Integer getAge() { return age; } public void setAge(Integer age) { this.age = age; } public String getSex() { return sex; } public void setSex(String sex) { this.sex = sex; } public String getUserName() { return userName; } public void setUserName(String userName) { this.userName = userName; } }
?
例子三、
package ivyy.taobao.com.domain.json; import ivyy.taobao.com.utils.GlobalConstants; import ivyy.taobao.com.utils.HttpRequestUtils; import java.util.Iterator; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; /** *@Author:liangjilong *@Date:2015-1-4 *@Email:[email protected] *@Version:1.0 *@Description这个是通过fastjson处理的 */ public class SinaNew { public static void main(String[] args) throws Exception { String requestURL = GlobalConstants.getUrl(2, "json"); String jsonText = HttpRequestUtils.HttpURLConnRequest(requestURL, "GET"); //System.out.println(jsonText); // 处理页面的json数据 int start = jsonText.indexOf("(") + 1; jsonText = jsonText.substring(start, jsonText.lastIndexOf(")")); String result = ""; JSONObject jsonObject = JSONObject.parseObject(jsonText); result = jsonObject.get("result").toString(); JSONObject resObj = JSONObject.parseObject(result); // String encoding=resObj.get("encoding").toString();//获取到编码 String dataStr = resObj.get("data").toString(); JSONArray dataArr = JSONArray.parseArray(dataStr); String title = "", url = "", keywords = "", img = "", media_name = ""; int i=0; for (Iterator iterator = dataArr.iterator(); iterator.hasNext();) { JSONObject object = (JSONObject) iterator.next(); title = object.get("title").toString();// title url = object.get("url").toString();// url keywords = object.get("keywords").toString();// keywords img = object.get("img").toString();// img media_name = object.get("media_name").toString();// media_name String newsText=GlobalConstants.getNewsContent(url);//处理新闻内容 //System.out.println("==================第"+i+"篇=================="+newsText); i++; System.out.println(title + "\n" + url + "\n" + keywords + "\n"+ url + "\n" + media_name); } } }
?
package ivyy.taobao.com.utils; import java.net.URL; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; /** *@Author:liangjilong *@Date:2015-1-4 *@Email:[email protected] *@Version:1.0 *@Description */ public class GlobalConstants { /*** * 获取url连接 * @param page第几页 * @param format格式(XML、JSON) * @return */ public static String getUrl(Integer page,String format){ StringBuffer buffer=new StringBuffer("http://api.roll.news.sina.com.cn/zt_list?channel=news"); String url=""; buffer.append("&cat_1=shxw");//显示新闻 buffer.append("&cat_2==zqsk||=qwys||=shwx||=fz-shyf"); buffer.append("&level==1||=2");//级别 buffer.append("&show_ext=1"); buffer.append("&show_all=1");//显示所有 buffer.append("&show_num=22");//显示多少条 buffer.append("&tag=1"); buffer.append("&format="+format); buffer.append("&page="+page); buffer.append("&callback=newsloader"); url=buffer.toString(); return url; } /*** * 获取文章的内容 * 从新浪的网页分析,通过文章body的id就可以拿到相应的文章内容.. * @param url * @return */ public static String getNewsContent(String url) throws Exception{ Document doc=Jsoup.parse(new URL(url), 3000); if(doc!=null){ String artibody=doc.getElementById("artibody").html();//通过网页的html的id去拿到新闻内容artibody return artibody; }else{ return "网络异常"; } } }
?源代码:http://download.csdn.net/detail/jilongliang/8324543
package ivyy.taobao.com.utils; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; /** *@Author:liangjilong *@Date:2015-1-4 *@Email:[email protected] *@Version:1.0 *@Description */ public class HttpRequestUtils { /** * 发送http请求 * POST和GET请求都可以 * @param requestUrl 请求地址 * @param method传入的执行的方式 是GET还是POST方式 * @return String */ public static String HttpURLConnRequest(String requestUrl,String method) { StringBuffer buffer = new StringBuffer(); try { URL url = new URL(requestUrl); HttpURLConnection httpUrlConn = (HttpURLConnection) url.openConnection(); httpUrlConn.setDoInput(true); httpUrlConn.setRequestMethod(method); httpUrlConn.setUseCaches(false); httpUrlConn.setInstanceFollowRedirects(true); //重定向 httpUrlConn.connect(); // 将返回的输入流转换成字符串 InputStream inputStream = httpUrlConn.getInputStream(); InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "utf-8"); BufferedReader bufferedReader = new BufferedReader(inputStreamReader); String str = null; while ((str = bufferedReader.readLine()) != null) { buffer.append(str); } bufferedReader.close(); inputStreamReader.close(); // 释放资源 inputStream.close(); inputStream = null; httpUrlConn.disconnect(); } catch (Exception e) { e.printStackTrace(); } return buffer.toString(); } }
?
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。