网页内容扫描器
程序简介
使用RestSharp即时获取目标网页的内容,使用正则表达式提取需要识别的内容并对比结果。
c#正则表达式的[贪婪]模式
正则表达式的引擎是贪婪,只要模式允许,它将匹配尽可能多的字符。
如何匹配满足条件的最短字符 ?
通过在“重复描述字符”(如*,如+)后面添加“?”,可以将匹配模式改成非贪婪。
代码
主窗体代码
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using Wsion.ClassLib; using RestSharp; using System.Net; using System.Text.RegularExpressions; using System.Threading; namespace WebpageScanner { public partial class Form1 : Form { Thread thread = null; ~Form1() { thread = null; } public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { Thread thread = new Thread(new ThreadStart(process)); thread.Start(); button1.Enabled = false; } private void button2_Click(object sender, EventArgs e) { textBoxLog.Text = string.Empty; } private void button3_Click(object sender, EventArgs e) { button1.Enabled = true; } private void process() { while (!button1.Enabled) { RestClient client = new RestClient(); client.BaseUrl = new Uri(textBoxUrl.Text); //设置代理 //client.Proxy = new WebProxy("proxy.net", 8080); //client.Proxy.Credentials = System.Net.CredentialCache.DefaultCredentials; RestRequest request = new RestRequest(); request.Method = Method.GET; IRestResponse responsebody = client.Execute(request); string resBody = EncodingHelper.Instance.BytesToUnicode(responsebody.RawBytes); string result; Regex regex = new Regex(textBoxRegex.Text, RegexOptions.Multiline & RegexOptions.IgnoreCase); result = regex.Match(resBody).Groups[1].ToString().Replace(‘\n‘, ‘ ‘).Replace(‘\r‘, ‘ ‘).Trim(); SetText(textBoxLog, textBoxLog.Text + string.Format("[{0}]{1}{2}{1}{1}", DateTime.Now.ToLongTimeString(), Environment.NewLine, result)); if (result != textBoxTarget.Text) { Alert(); SetEnabled(button1, true); } System.Threading.Thread.Sleep(2000); } } /// <summary> /// 提示扫描到信息更新 /// </summary> private void Alert() { System.Diagnostics.Process.Start(textBoxPath.Text); } #region SetVal Template /* * 跨线程改变控件的属性值 */ delegate void SetValueHandler<T>(Control control, T val); private void SetText(Control control, string val) { if (control.InvokeRequired) { SetValueHandler<string> handler = new SetValueHandler<string>(SetText); control.Invoke(handler, new object[] { control, val }); } else { control.Text = val; } } private void SetEnabled(Control control, bool val) { if (control.InvokeRequired) { SetValueHandler<bool> handler = new SetValueHandler<bool>(SetEnabled); control.Invoke(handler, new object[] { control, val }); } else { control.Enabled = val; } } #endregion } }
以下为代码转换器部分
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Wsion.ClassLib { /// <summary> /// 编码转换器 /// </summary> public class EncodingHelper { public static readonly EncodingHelper Instance = new EncodingHelper(); public string EncodingConvert(string fromString, Encoding fromEncoding, Encoding toEncoding) { byte[] fromBytes = fromEncoding.GetBytes(fromString); byte[] toBytes = Encoding.Convert(fromEncoding, toEncoding, fromBytes); string toString = toEncoding.GetString(toBytes); return toString; } public string GB2312ToUtf8(string gb2312String) { Encoding fromEncoding = Encoding.GetEncoding("gb2312"); Encoding toEncoding = Encoding.UTF8; return EncodingConvert(gb2312String, fromEncoding, toEncoding); } public string Utf8ToGB2312(string utf8String) { Encoding fromEncoding = Encoding.UTF8; Encoding toEncoding = Encoding.GetEncoding("gb2312"); return EncodingConvert(utf8String, fromEncoding, toEncoding); } public string GbkToUnicode(string gbkString) { Encoding fromEncoding = Encoding.GetEncoding("gbk"); Encoding toEncoding = Encoding.Unicode; return EncodingConvert(gbkString, fromEncoding, toEncoding); } public string BytesToUnicode(byte[] bytes) { Encoding toEncoding = Encoding.GetEncoding("gbk"); return toEncoding.GetString(bytes); } } }
目前多线程Thread.Abort()时程序偶尔会崩溃,希望得到指正。
源码下载地址:
http://files.cnblogs.com/files/wsion/WebpageScanner.7z
原创博文,转载请注明出处
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。