httpClient如何接收格式错误的响应头部信息
Exception in thread "main" org.apache.commons.httpclient.ProtocolException: Unable to parse header: share memory not exist, need create new share memory! at org.apache.commons.httpclient.HttpParser.parseHeaders(HttpParser.java:202) at org.apache.commons.httpclient.HttpMethodBase.readResponseHeaders(HttpMethodBase.java:1935) at org.apache.commons.httpclient.HttpMethodBase.readResponse(HttpMethodBase.java:1737) at org.apache.commons.httpclient.HttpMethodBase.execute(HttpMethodBase.java:1098) at org.apache.commons.httpclient.HttpMethodDirector.executeWithRetry(HttpMethodDirector.java:398) at org.apache.commons.httpclient.HttpMethodDirector.executeMethod(HttpMethodDirector.java:171) at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:397) at org.apache.commons.httpclient.HttpClient.executeMethod(HttpClient.java:323)
做网页爬虫的时候, 模拟Get请求, 访问网址, 显示上面的错误异常。 问了3个前辈,都说没遇到过, 这可头疼屎了。
主要也不知道是什么问题, 有人说可能是buffer,把大小设置大点。花了一个晚上查网上资料, 有关终于有点眉目, 见参考网址 : http://bbs.csdn.net/topics/390178589
/** * */ package com.http; import java.io.IOException; import org.apache.http.Header; import org.apache.http.HttpException; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseFactory; import org.apache.http.HttpVersion; import org.apache.http.conn.ClientConnectionOperator; import org.apache.http.conn.OperatedClientConnection; import org.apache.http.conn.scheme.SchemeRegistry; import org.apache.http.impl.conn.BasicClientConnectionManager; import org.apache.http.impl.conn.DefaultClientConnection; import org.apache.http.impl.conn.DefaultClientConnectionOperator; import org.apache.http.impl.conn.DefaultHttpResponseParser; import org.apache.http.io.HttpMessageParser; import org.apache.http.io.SessionInputBuffer; import org.apache.http.message.BasicHeader; import org.apache.http.message.BasicHttpResponse; import org.apache.http.message.BasicLineParser; import org.apache.http.message.BasicStatusLine; import org.apache.http.message.LineParser; import org.apache.http.params.HttpParams; import org.apache.http.util.CharArrayBuffer; /** * @author yingzi * */ public class MyBasicClientConnectionManager extends BasicClientConnectionManager { public MyBasicClientConnectionManager() { super(); } @Override protected ClientConnectionOperator createConnectionOperator( final SchemeRegistry sr) { return new MyClientConnectionOperator(sr); } class MyClientConnection extends DefaultClientConnection { @Override protected HttpMessageParser createResponseParser( final SessionInputBuffer buffer, final HttpResponseFactory responseFactory, final HttpParams params) { return new MyDefaultHttpResponseParser(buffer, new MyLineParser(), responseFactory, params); } } class MyDefaultHttpResponseParser extends DefaultHttpResponseParser { public MyDefaultHttpResponseParser(SessionInputBuffer buffer, LineParser parser, HttpResponseFactory responseFactory, HttpParams params) { super(buffer, parser, responseFactory, params); } @Override protected HttpResponse parseHead( final SessionInputBuffer sessionBuffer) throws IOException, HttpException { try { return super.parseHead(sessionBuffer); } catch (Exception ex) { // 压制ParseException异常 return new BasicHttpResponse(new BasicStatusLine(HttpVersion.HTTP_1_1, 200, "")); } } } class MyClientConnectionOperator extends DefaultClientConnectionOperator { public MyClientConnectionOperator(final SchemeRegistry sr) { super(sr); } @Override public OperatedClientConnection createConnection() { return new MyClientConnection(); } } class MyLineParser extends BasicLineParser { @Override public Header parseHeader(final CharArrayBuffer buffer) { try { return super.parseHeader(buffer); } catch (Exception ex) { // 压制ParseException异常 return new BasicHeader("invalid", buffer.toString()); } } } }
而我用的是MultiThreadedHttpConnectionManager, 不怎么试用, 于是我看日志的异常trac, 查看了httpClient的具体的excute方法里的代码,瞎鸡巴猜想了下, 觉得要把原来的GetMethod改掉用
/** * */ package com.http; import java.io.IOException; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpConnection; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpParser; import org.apache.commons.httpclient.HttpState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author yingzi * */ public class MyHttpGetMethod extends org.apache.commons.httpclient.methods.GetMethod { private static final Logger log = LoggerFactory.getLogger( MyHttpGetMethod.class ); public MyHttpGetMethod(){ super(); } public MyHttpGetMethod(String url){ super(url); } @Override protected void readResponseHeaders(HttpState state, HttpConnection conn) throws IOException, HttpException { getResponseHeaderGroup().clear(); Header[] headers = {new Header("Connection","Keep-Alive"), new Header("Content-Type","text/html; charset=GB18030"), new Header("Keep-Alive","timeout=20"), new Header("Cache-control","max-age=3600")}; try { headers = HttpParser.parseHeaders( conn.getResponseInputStream(), getParams().getHttpElementCharset()); } catch (Exception ex) { // 压制ParseException异常 log.warn("response header has some error info , can not parse normally."); } // Wire logging moved to HttpParser getResponseHeaderGroup().setHeaders(headers); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。