C#网页采集

  /// <summary>
        /// 返回提取数组
        /// </summary>
        /// <param name="rex">正则</param>
        /// <param name="urlValue">字符串</param>
        /// <returns></returns>
        private string[] rexID(string rex, string urlValue)
        {
            ArrayList al = new ArrayList();
            string strRegex = rex;
            Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
            MatchCollection m = r.Matches(urlValue);
            for (int i = 0; i <= m.Count - 1; i++)
            {
                bool rep = false;
                string strNew = m[i].ToString();
                string zregexStr = rex;
                Regex l = new Regex(zregexStr, RegexOptions.None);
                Match mc = l.Match(strNew);
                string dataStr = mc.Groups["key"].Value;
                // 过滤重复的URL 
                foreach (string str in al)
                {
                    if (strNew == str)
                    {
                        rep = true;
                        break;
                    }
                }
                if (!rep)
                {
                    al.Add(dataStr);
                }
            }
            string[] shuzu = new string[al.Count];
            int id = 0;
            foreach (string item in al)
            {
                shuzu[id] = item;
                id++;
            }
            return shuzu;
        }

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。