过滤除表格以外的所有html代码,如何实现?

来源:百度知道 编辑:UC知道 时间:2024/08/20 18:07:07
过滤除表格以外的所有html代码,如何实现?

下面方法有些问题,请帮我修改或补充一下 谢谢!

public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring=Htmlstring.Replace("'", "'");
Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*? </script>", "",RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @" <(.[^(table|tr|td|>)]*)>", "",RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" <!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", &

public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring=Htmlstring.Replace("'", "'");
Htmlstring = Regex.Replace(Htmlstring, @" <script[^>]*?>.*? </script>", "",RegexOptions.IgnoreCase);
//删除HTML 下面一行删除,涉及到表格里面的table tr td
//Htmlstring = Regex.Replace(Htmlstring, @" <(.[^(table|tr|td|>)]*)>", "",RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @" <!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",RegexOptions.Ign