iOS开发之html解析

使用XPath解析html

可以从此处https://github.com/topfunky/hpple下载工程,将TFHpple.h,TFHpple.m,TFHppleElement.h,TFHppleElement.m,XPathQuery.h,XPathQuery.m加到自己的项目中,在Frameworks中导入libxml2.x


 
在项目中找到Other Linker Flags,加入-libxml2


在项目中找到Header Search Paths,加入/usr/include/libxml2
代码如下:
 1 NSString *urlString = nil;
 2  
 3 urlString = @"http://www.xiyou.edu.cn/new/lm.jsp?urltype=tree.TreeTempUrl&wbtreeid=724";
 4  
 5 NSData *htmlData = [[NSData alloc] initWithContentsOfURL:[NSURL URLWithString:urlString]];
 6  
 7 NSData *toHtmlData = [self toUTF8:htmlData];
 8  
 9 TFHpple *xpathParser = [[TFHpple alloc] initWithHTMLData:toHtmlData];
10  
11  
12 NSArray *aArray = [xpathParser searchWithXPathQuery:@"//a"];
13  
14 if ([span count] > 0) {
15             
16             for (int i = 87; i < 102; i++) {
17                             //从<a>的第82个开始取值,共获取15个值
18                 TFHppleElement *aElement = [aArray objectAtIndex:i];    
19                 NSArray *aArr = [aElement children];
20                 TFHppleElement *aEle = [aArr objectAtIndex:0];
21                 NSArray *aChild = [aEle children];
22                 TFHppleElement *aChildEle = [aChild objectAtIndex:0];
23                 NSArray *aChildren = [aChildEle children];
24                 NSString *aStr = [[aChildren objectAtIndex:0] content];
25                 NSLog(@"aStr:%@",aStr);
26                 NSDictionary *aAttributeDict = [aElement attributes];
27                 NSLog(@"aAttributeDict:%@",aAttributeDict);
28                 
29                             //获取a中的属性值
30                 NSString *hrefStr = [NSString stringWithFormat:@"http://www.xiyou.edu.cn%@",[aAttributeDict objectForKey:@"href"]];
31                 NSLog(@"hrefStr:%@",hrefStr);
32                 
33                 [currentNewsArr addObject:aStr];
34                 [currentHrefArr addObject:hrefStr];
35                 
36             }
37 [htmlData release];
38 [xpathParser release];
39 }
40 
41 //如果解析的网页不是utf8编码,如gbk编码,可以先将其转换为utf8编码再对其进行解析
42  
43 -(NSData *) toUTF8:(NSData *)sourceData {  
44     CFStringRef gbkStr = CFStringCreateWithBytes(NULL, [sourceData bytes], [sourceData length],kCFStringEncodingGB_18030_2000, false);  
45     
46     if (gbkStr == NULL) {  
47         return nil;  
48     } else {  
49         NSString *gbkString = (NSString *)gbkStr; 
50         //根据网页源代码中编码方式进行修改,此处为从gbk转换为utf8
51              NSString *utf8_String = [gbkString stringByReplacingOccurrencesOfString:@"META http-equiv="Content-Type" content="text/html; charset=GBK""  
52                                                                       withString:@"META http-equiv="Content-Type" content="text/html; charset=UTF-8""];  
53         
54         return [utf8_String dataUsingEncoding:NSUTF8StringEncoding];                            
55     }                                     
56 }

 

http://blog.csdn.net/majiakun1/article/details/39472489

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。