C#网页采集
/// <summary> /// 返回提取数组 /// </summary> /// <param name="rex">正则</param> /// <param name="urlValue">字符串</param> /// <returns></returns> private string[] rexID(string rex, string urlValue) { ArrayList al = new ArrayList(); string strRegex = rex; Regex r = new Regex(strRegex, RegexOptions.IgnoreCase); MatchCollection m = r.Matches(urlValue); for (int i = 0; i <= m.Count - 1; i++) { bool rep = false; string strNew = m[i].ToString(); string zregexStr = rex; Regex l = new Regex(zregexStr, RegexOptions.None); Match mc = l.Match(strNew); string dataStr = mc.Groups["key"].Value; // 过滤重复的URL foreach (string str in al) { if (strNew == str) { rep = true; break; } } if (!rep) { al.Add(dataStr); } } string[] shuzu = new string[al.Count]; int id = 0; foreach (string item in al) { shuzu[id] = item; id++; } return shuzu; }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。