html代码里面所有的链接地址和链接名称

package parser;

 

import org.htmlparser.Parser;

import org.htmlparser.Node;

import org.htmlparser.NodeFilter;

import org.htmlparser.Parser;

importorg.htmlparser.filters.TagNameFilter;

import org.htmlparser.tags.LinkTag;

import org.htmlparser.tags.TableTag;

import org.htmlparser.util.NodeList;

import org.htmlparser.util.ParserException;

import org.htmlparser.visitors.HtmlPage;

 

/**

 *htmlparser取得一段html代码里面所有的链接地址和链接名称

 *

 *@author chenguoyong

 *

 */

public class Testhtmlparser {

 

       /**

        * @param args

        */

       publicstatic void main(String[] args) {

              Stringhtmlcode ="<HTML><HEAD><TITLE>AAA</TITLE></HEAD><BODY>"

                            +"<a href=‘http://topic.csdn.net/u/20080522/14/0ff402ef-c382-499a-8213-ba6b2f550425.html‘>连接1</a>"

                            +"<a href=‘http://topic.csdn.net‘>连接2</a></BODY></HTML>";

              //创建Parser对象根据传给字符串和指定的编码

              Parserparser = Parser.createParser(htmlcode, "GBK");

              //创建HtmlPage对象HtmlPage(Parser parser)

              HtmlPagepage = new HtmlPage(parser);

              try{

                     //HtmlPage extends visitor,Apply the given visitor to the current

                     //page.

                     parser.visitAllNodesWith(page);

              }catch (ParserException e1) {

                     e1= null;

              }

              //所有的节点

              NodeListnodelist = page.getBody();

              //建立一个节点filter用于过滤节点

              NodeFilterfilter = new TagNameFilter("A");

              //得到所有过滤后,想要的节点

              nodelist= nodelist.extractAllNodesThatMatch(filter, true);

              for(int i = 0; i < nodelist.size(); i++) {

                     LinkTaglink = (LinkTag) nodelist.elementAt(i);

                     //链接地址

                     System.out.println(link.getAttribute("href")+ "\n");

                     //链接名称

                     System.out.println(link.getStringText());

              }

 

       }

 http://c.tieba.baidu.com/p/3476776824
http://c.tieba.baidu.com/p/3476808306
http://c.tieba.baidu.com/p/3476798710
http://c.tieba.baidu.com/p/3474281354
http://c.tieba.baidu.com/p/3474300101
http://c.tieba.baidu.com/p/3474294075
http://c.tieba.baidu.com/p/3474123295
http://c.tieba.baidu.com/p/3474314242
http://c.tieba.baidu.com/p/3474310411
http://c.tieba.baidu.com/p/3474304550
http://c.tieba.baidu.com/p/3475433945
http://c.tieba.baidu.com/p/3475430015
http://c.tieba.baidu.com/p/3475433348
http://c.tieba.baidu.com/p/3475431434
http://c.tieba.baidu.com/p/3474176863
http://c.tieba.baidu.com/p/3474159835
http://c.tieba.baidu.com/p/3474163941
http://c.tieba.baidu.com/p/3474156121
http://c.tieba.baidu.com/p/3474147660
http://c.tieba.baidu.com/p/3474151899
http://c.tieba.baidu.com/p/3474142287
http://c.tieba.baidu.com/p/3474136965
http://c.tieba.baidu.com/p/3474133165
http://c.tieba.baidu.com/p/3474128675
http://c.tieba.baidu.com/p/3474103896
http://c.tieba.baidu.com/p/3474099488
http://c.tieba.baidu.com/p/3474094120
http://c.tieba.baidu.com/p/3475431976
http://c.tieba.baidu.com/p/3474267991
http://c.tieba.baidu.com/p/3474259583
http://c.tieba.baidu.com/p/3474254990
http://c.tieba.baidu.com/p/3474228986
http://c.tieba.baidu.com/p/3474221626
http://c.tieba.baidu.com/p/3474215742
http://c.tieba.baidu.com/p/3474212122
http://c.tieba.baidu.com/p/3474188883
http://c.tieba.baidu.com/p/3474207722
http://c.tieba.baidu.com/p/3474184143
http://c.tieba.baidu.com/p/3474180522
http://c.tieba.baidu.com/p/3474171022
http://c.tieba.baidu.com/p/3474086627
http://c.tieba.baidu.com/p/3462847203
http://c.tieba.baidu.com/p/3462839334
http://c.tieba.baidu.com/p/3462834294
http://c.tieba.baidu.com/p/3462786130
http://c.tieba.baidu.com/p/3462782768
http://c.tieba.baidu.com/p/3461791753
http://c.tieba.baidu.com/p/3461784215
http://c.tieba.baidu.com/p/3461778008
http://c.tieba.baidu.com/p/3461772860
http://c.tieba.baidu.com/p/3461767442
http://c.tieba.baidu.com/p/3461736231
http://c.tieba.baidu.com/p/3461704953
http://c.tieba.baidu.com/p/3461692676
http://c.tieba.baidu.com/p/3461665341
http://c.tieba.baidu.com/p/3461656389
http://c.tieba.baidu.com/p/3461660595
http://c.tieba.baidu.com/p/3461566608
http://c.tieba.baidu.com/p/3461652243
http://c.tieba.baidu.com/p/3461561596
http://c.tieba.baidu.com/p/3461557067



}

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。