HtmlUnit模仿浏览器抓取数据(含ajax)
import java.io.IOException; import java.net.MalformedURLException; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController; import com.gargoylesoftware.htmlunit.SilentCssErrorHandler; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class WorldBankCrawl { public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException { WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); webClient.setCssErrorHandler(new SilentCssErrorHandler()); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); webClient.getOptions().setCssEnabled(true); webClient.getOptions().setRedirectEnabled(false); webClient.getOptions().setAppletEnabled(false); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setPopupBlockerEnabled(true); webClient.getOptions().setTimeout(10000); HtmlPage page = webClient.getPage("http://huaban.com/favorite/home/"); System.out.println(page.asXml()); webClient.closeAllWindows(); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。