读取网页内容不在出现乱码
有没有发现每次读取网页内容时,都要去找网页的编码类型,这次研究出来一个公共的方法,下次读取网页内容时
再不会出现乱码了.
package package org.httpclient; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import org.apache.commons.lang3.ArrayUtils; import com.auto.generate.log.LogManager; public class HttpClient { private String charset ; public Cookie[]cookies ; public String userAgent = "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36" ; /** * 换行字符 * */ public static final String LINE_SEPARATOR = System.getProperty("line.separator") ; /** * 执行读取网页代码 * */ public String execute(String url){ StringBuffer stringBuffer = new StringBuffer(); try { URL u=new URL( url ); HttpURLConnection conn = (HttpURLConnection)u.openConnection() ; StringBuffer cookieBuffer = new StringBuffer(); if(null != cookies && cookies.length > 0){ int length = 0 ; for(int x=0;x<cookies.length;x++){ Cookie cookie = cookies[x]; if(length != 0){ cookieBuffer.append("; ") ; } length++; cookieBuffer.append( cookie.getName()).append("=") .append(cookie.getValue()) ; } } conn.setRequestProperty("User-agent" , userAgent ) ; conn.addRequestProperty("Cookie" , cookieBuffer.toString() ); conn.connect( ) ; this.charset = conn.getContentType().replaceAll(".+?charset=" , "" ) ; InputStream is = conn.getInputStream() ; BufferedReader bf = new BufferedReader(new InputStreamReader( is , charset )); String line = null ; while( (line = bf.readLine()) != null ){ stringBuffer.append( line ).append( LINE_SEPARATOR ) ; } String cookie = conn.getHeaderField( "Set-Cookie" ) ; splitCookies( cookie ) ; conn.disconnect() ; } catch (Exception e) { LogManager.err( "http请求错误" , e ) ; } return stringBuffer.toString( ) ; } private void splitCookies(String cookie){ if(null!=cookie){ String[]cookieArray = cookie.split(";"); for(String cookieStr : cookieArray){ String[]tempArr = cookieStr.split("=") ; if(tempArr.length == 2){ cookies = ArrayUtils.add(cookies, new Cookie( tempArr[0], tempArr[1] )); } } } } /** * 获取当前网页的编码类型 * */ public String getCharset() { return charset; } /** * 获取网页cookie消息 * */ public Cookie[] getCookies() { return cookies; } /** * 设置user-Agent * */ public void setUserAgent(String userAgent) { if(null == userAgent || "".equals( userAgent.trim() )){ return ; } this.userAgent = userAgent ; } public void setCookies(Cookie[] cookies) { this.cookies = cookies; } }
cookie类:
package org.httpclient; public class Cookie { private String name ; private String value ; public Cookie(String name, String value) { this.name = name; this.value = value; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。