Htmlunit使用
import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.html.HtmlDivision;import com.gargoylesoftware.htmlunit.html.HtmlAnchor;import com.gargoylesoftware.htmlunit.*;import com.gargoylesoftware.htmlunit.WebClientOptions;import com.gargoylesoftware.htmlunit.html.HtmlInput;import com.gargoylesoftware.htmlunit.html.HtmlBody;import java.util.List;public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ String str; //创建一个webclient WebClient webClient = new WebClient(); //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); //获取页面 HtmlPage page = webClient.getPage("http://www.baidu.com/"); //获取页面的TITLE str = page.getTitleText(); System.out.println(str); //获取页面的XML代码 str = page.asXml(); System.out.println(str); //获取页面的文本 str = page.asText(); System.out.println(str); //关闭webclient webClient.closeAllWindows(); } }
3.2 使用不同版本的浏览器打开
import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.HtmlPage;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.html.HtmlDivision;import com.gargoylesoftware.htmlunit.html.HtmlAnchor;import com.gargoylesoftware.htmlunit.*;import com.gargoylesoftware.htmlunit.WebClientOptions;import com.gargoylesoftware.htmlunit.html.HtmlInput;import com.gargoylesoftware.htmlunit.html.HtmlBody;import java.util.List;public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ String str; //使用FireFox读取网页 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = webClient.getPage("http://www.baidu.com/"); str = page.getTitleText(); System.out.println(str); //关闭webclient webClient.closeAllWindows(); } }
3.3 找到页面中特定的元素
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //创建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //通过id获得"百度一下"按钮 HtmlInput btn = (HtmlInput)page.getHtmlElementById("su"); System.out.println(btn.getDefaultValue()); //关闭webclient webClient.closeAllWindows(); } }
3.4 元素检索
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //创建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //查找所有div List<?> hbList = page.getByXPath("//div"); HtmlDivision hb = (HtmlDivision)hbList.get(0); System.out.println(hb.toString()); //查找并获取特定input List<?> inputList = page.getByXPath("//input[@id=‘su‘]"); //List links = (List) page.getByXPath ("//*[@id=\"groups_tab\"]/div[1]/ul/li[1]/a"); HtmlInput input = (HtmlInput)inputList.get(0); System.out.println(input.toString()); //关闭webclient webClient.closeAllWindows(); } }
3.5 提交搜索
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //创建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 对css和javascript的支持不好,所以请关闭之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //获取搜索输入框并提交搜索内容 HtmlInput input = (HtmlInput)page.getHtmlElementById("kw"); System.out.println(input.toString()); input.setValueAttribute("雅蠛蝶"); System.out.println(input.toString()); //获取搜索按钮并点击 HtmlInput btn = (HtmlInput)page.getHtmlElementById("su"); HtmlPage page2 = btn.click(); //输出新页面的文本 System.out.println(page2.asText()); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。