C#用正则表达式去掉Html中的script脚本和html标签
原文 C#用正则表达式去掉Html中的script脚本和html标签
///
<summary>
/// 用正则表达式去掉Html中的script脚本和html标签
/// </summary>
/// <param
name="Htmlstring"></param>
/// <returns></returns>
public
static
string
NoHTML(
string
Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring,
@"<script[^>]*?>.*?</script>"
,
""
,
RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring,
@"<(.[^>]*)>"
,
""
,
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"([\r\n])[\s]+"
,
""
,
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"-->"
,
""
,
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"<!--.*"
,
""
,
RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(quot|#34);"
,
"\""
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(amp|#38);"
,
"&"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(lt|#60);"
,
"<"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(gt|#62);"
,
">"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(nbsp|#160);"
,
"
"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(iexcl|#161);"
,
"\xa1"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(cent|#162);"
,
"\xa2"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(pound|#163);"
,
"\xa3"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&(copy|#169);"
,
"\xa9"
, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring,
@"&#(\d+);"
,
""
,
RegexOptions.IgnoreCase);
Htmlstring.Replace(
"<"
,
""
);
Htmlstring.Replace(
">"
,
""
);
Htmlstring.Replace(
"\r\n"
,
""
);
Htmlstring =
HttpUtility.HtmlDecode(Htmlstring).Replace(
"<br/>"
,
""
).Replace(
"<br>"
,
""
).Trim();
return
Htmlstring;
}
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。