Kaynak:http://www.rizasirman.com/web_tasarim/asp-net/icerikten-html-kodu-temizleme-strip-html/
Ancak description bölümünde herhangi bir kodun karışması sitenizin görüntüsünü ve çalışmasını olumsuz yönde etkileyecektir. Aşağıdaki kodu kullanarak html içeriklerinizi güvenilir çıplak bir metin içeriği haline getirebilirsiniz.
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
| private static string StripHTML(string source) { string result; result = source.Replace( "\r" , " " ); result = result.Replace( "\n" , " " ); result = result.Replace( "\t" , string.Empty); result = System.Text.RegularExpressions.Regex.Replace(result,@ "( )+" , " " ); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*head([^>])*>" , "<head>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "(<( )*(/)( )*head( )*>)" , "</head>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(<head>).*(</head>)" , string.Empty,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*script([^>])*>" , "<script>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "(<( )*(/)( )*script( )*>)" , "</script>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "(<script>).*(</script>)" , string.Empty,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*style([^>])*>" , "<style>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "(<( )*(/)( )*style( )*>)" , "</style>" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(<style>).*(</style>)" , string.Empty,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*td([^>])*>" , "\t" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*br( )*>" , "\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*li( )*>" , "\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*div([^>])*>" , "\r\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*tr([^>])*>" , "\r\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<( )*p([^>])*>" , "\r\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<[^>]*>" , string.Empty,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ " " , " " ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "•" , " * " ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "‹" , "<" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "›" , ">" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "™" , "(tm)" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "⁄" , "/" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "<" , "<" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ ">" , ">" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "©" , "(c)" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "®" , "(r)" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result,@ "&(.{2,6});" , string.Empty,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = result.Replace( "\n" , "\r" ); result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\r)" , "\r\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\t)" , "\t\t" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\t)( )+(\r)" , "\t\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)( )+(\t)" , "\r\t" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+(\r)" , "\r\r" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); result = System.Text.RegularExpressions.Regex.Replace(result, "(\r)(\t)+" , "\r\t" ,System.Text.RegularExpressions.RegexOptions.IgnoreCase); string breaks = "\r\r\r" ; string tabs = "\t\t\t\t\t" ; for ( int index = 0; index < result.Length; index++) { result = result.Replace(breaks, "\r\r" ); result = result.Replace(tabs, "\t\t\t\t" ); breaks = breaks + "\r" ; tabs = tabs + "\t" ; } return result; } |
Hiç yorum yok :
Yorum Gönder