Java 读取网页源代码
package com.sphere.service; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; public class QueryService { /** * 发起http get请求获取网页源代码 * @param requestUrl * @return * @throws IOException */ private static String httpRequest(String requestUrl) throws IOException{ StringBuffer buffer = null; HttpURLConnection httpUrlConn = null; InputStream inputStream = null; InputStreamReader inputStreamReader = null; BufferedReader bufferedReader = null; try { // 建立连接 URL url = new URL(requestUrl); httpUrlConn = (HttpURLConnection)url.openConnection(); httpUrlConn.setDoInput(true); httpUrlConn.setRequestMethod("GET"); //获取输入流 inputStream = httpUrlConn.getInputStream(); // http://www.bjsubway.com/support/swzl/ 此网页是gb2312编码 inputStreamReader = new InputStreamReader(inputStream,"gb2312"); bufferedReader = new BufferedReader(inputStreamReader); // 读取返回结果 buffer = new StringBuffer(); String str = null; while((str = bufferedReader.readLine())!= null){ buffer.append(str); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ //释放资源,依次关闭流 if(bufferedReader != null){ bufferedReader.close(); } if(inputStreamReader != null){ inputStreamReader.close(); } if(inputStream != null){ inputStream.close(); } //释放资源,关闭http连接 if(httpUrlConn != null){ httpUrlConn.disconnect(); } } return buffer.toString(); } public static void main(String[] args) throws IOException { String Url = "http://www.bjsubway.com/support/swzl/"; String htmlString = httpRequest(Url); System.out.println(htmlString); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。