html代码里面所有的链接地址和链接名称
package parser;
import org.htmlparser.Parser;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
importorg.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
/**
*htmlparser取得一段html代码里面所有的链接地址和链接名称
*
*@author chenguoyong
*
*/
public class Testhtmlparser {
/**
* @param args
*/
publicstatic void main(String[] args) {
Stringhtmlcode ="<HTML><HEAD><TITLE>AAA</TITLE></HEAD><BODY>"
+"<a href=‘http://topic.csdn.net/u/20080522/14/0ff402ef-c382-499a-8213-ba6b2f550425.html‘>连接1</a>"
+"<a href=‘http://topic.csdn.net‘>连接2</a></BODY></HTML>";
//创建Parser对象根据传给字符串和指定的编码
Parserparser = Parser.createParser(htmlcode, "GBK");
//创建HtmlPage对象HtmlPage(Parser parser)
HtmlPagepage = new HtmlPage(parser);
try{
//HtmlPage extends visitor,Apply the given visitor to the current
//page.
parser.visitAllNodesWith(page);
}catch (ParserException e1) {
e1= null;
}
//所有的节点
NodeListnodelist = page.getBody();
//建立一个节点filter用于过滤节点
NodeFilterfilter = new TagNameFilter("A");
//得到所有过滤后,想要的节点
nodelist= nodelist.extractAllNodesThatMatch(filter, true);
for(int i = 0; i < nodelist.size(); i++) {
LinkTaglink = (LinkTag) nodelist.elementAt(i);
//链接地址
System.out.println(link.getAttribute("href")+ "\n");
//链接名称
System.out.println(link.getStringText());
}
}
}
结果如下:
http://c.tieba.baidu.com/p/3381916727http://c.tieba.baidu.com/p/3381917973
http://c.tieba.baidu.com/p/3381918306
http://c.tieba.baidu.com/p/3381918814
http://c.tieba.baidu.com/p/3381919238
http://c.tieba.baidu.com/p/3381919608
http://c.tieba.baidu.com/p/3381920024
http://c.tieba.baidu.com/p/3381920446
http://c.tieba.baidu.com/p/3381920900
http://c.tieba.baidu.com/p/3381921324
http://c.tieba.baidu.com/p/3381921731
http://c.tieba.baidu.com/p/3381922148
http://c.tieba.baidu.com/p/3381922597
http://c.tieba.baidu.com/p/3381923026
http://c.tieba.baidu.com/p/3381923447
http://c.tieba.baidu.com/p/3381923897
http://c.tieba.baidu.com/p/3381923897
http://c.tieba.baidu.com/p/3381293393
http://c.tieba.baidu.com/p/3381301014
http://c.tieba.baidu.com/p/3381326422
http://c.tieba.baidu.com/p/3381332114
http://c.tieba.baidu.com/p/3381311298
http://c.tieba.baidu.com/p/3381337437
http://c.tieba.baidu.com/p/3381342004
http://c.tieba.baidu.com/p/3381347422
http://c.tieba.baidu.com/p/3381351472
http://c.tieba.baidu.com/p/3381357838
http://c.tieba.baidu.com/p/3381378752
http://c.tieba.baidu.com/p/3381391030
http://c.tieba.baidu.com/p/3381394942
http://c.tieba.baidu.com/p/3381424809
http://c.tieba.baidu.com/p/3381435155
http://c.tieba.baidu.com/p/3381451017
http://c.tieba.baidu.com/p/3381456969
http://c.tieba.baidu.com/p/3381461871
http://c.tieba.baidu.com/p/3381466962
http://c.tieba.baidu.com/p/3381472111
http://c.tieba.baidu.com/p/3381477242
http://c.tieba.baidu.com/p/3381482375
http://c.tieba.baidu.com/p/3381487672
http://c.tieba.baidu.com/p/3381491805
http://c.tieba.baidu.com/p/3381509966
http://c.tieba.baidu.com/p/3381519038
http://c.tieba.baidu.com/p/3381543551
http://c.tieba.baidu.com/p/3381557232
http://c.tieba.baidu.com/p/3381562078
http://c.tieba.baidu.com/p/3381566855
http://c.tieba.baidu.com/p/3381575174
http://c.tieba.baidu.com/p/3381579869
http://c.tieba.baidu.com/p/3381586941
http://c.tieba.baidu.com/p/3381603387
http://c.tieba.baidu.com/p/3381610949
http://c.tieba.baidu.com/p/3381623805
http://c.tieba.baidu.com/p/3381631700
http://c.tieba.baidu.com/p/3381636465
http://c.tieba.baidu.com/p/3381641623
http://c.tieba.baidu.com/p/3381647911
http://c.tieba.baidu.com/p/3381652692
http://c.tieba.baidu.com/p/3381659223
http://c.tieba.baidu.com/p/3381665481
http://c.tieba.baidu.com/p/3381670808
http://c.tieba.baidu.com/p/3381681092
http://c.tieba.baidu.com/p/3381689738
http://c.tieba.baidu.com/p/3381700900
http://c.tieba.baidu.com/p/3381714244
http://c.tieba.baidu.com/p/3381710390
http://c.tieba.baidu.com/p/3381725415
http://c.tieba.baidu.com/p/3381714244
http://c.tieba.baidu.com/p/3381731047
http://c.tieba.baidu.com/p/3381740435
http://c.tieba.baidu.com/p/3381746335
http://c.tieba.baidu.com/p/3381763150
http://c.tieba.baidu.com/p/3381786671
http://c.tieba.baidu.com/p/3381772976
http://c.tieba.baidu.com/p/3381790411
http://c.tieba.baidu.com/p/3381798384
http://c.tieba.baidu.com/p/3381790411
http://c.tieba.baidu.com/p/3381802695
http://c.tieba.baidu.com/p/3381816588
http://c.tieba.baidu.com/p/3381827635
http://c.tieba.baidu.com/p/3381829846
http://c.tieba.baidu.com/p/3381839754
http://c.tieba.baidu.com/p/3381837540
http://c.tieba.baidu.com/p/3381841684
http://c.tieba.baidu.com/p/3381845458
http://c.tieba.baidu.com/p/3381848165
http://c.tieba.baidu.com/p/3381852442
http://c.tieba.baidu.com/p/3381855013
http://c.tieba.baidu.com/p/3381856722
http://c.tieba.baidu.com/p/3381859177
http://c.tieba.baidu.com/p/3381889912
http://c.tieba.baidu.com/p/3381890925
http://c.tieba.baidu.com/p/3381891956
http://c.tieba.baidu.com/p/3381895405
http://c.tieba.baidu.com/p/3381894440
http://c.tieba.baidu.com/p/3381895405
http://c.tieba.baidu.com/p/3381898614
http://c.tieba.baidu.com/p/3381901177
http://c.tieba.baidu.com/p/3381901970
http://c.tieba.baidu.com/p/3381902658
http://c.tieba.baidu.com/p/3381903076
http://c.tieba.baidu.com/p/3381905005
http://c.tieba.baidu.com/p/3381904283
http://c.tieba.baidu.com/p/3381903828
http://c.tieba.baidu.com/p/3381905537
http://c.tieba.baidu.com/p/3381906263
http://c.tieba.baidu.com/p/3381906656
http://c.tieba.baidu.com/p/3381908796
http://c.tieba.baidu.com/p/3381909192
http://c.tieba.baidu.com/p/3381909693
http://c.tieba.baidu.com/p/3381909520
http://c.tieba.baidu.com/p/3381909192
http://c.tieba.baidu.com/p/3381910900
http://c.tieba.baidu.com/p/3381911649
http://c.tieba.baidu.com/p/3381912081
http://c.tieba.baidu.com/p/3381912686
http://c.tieba.baidu.com/p/3381914209
http://c.tieba.baidu.com/p/3381914579
http://c.tieba.baidu.com/p/3381915369
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。