Description:-
In this example we explain that how to convert Text
to HTML in asp.net with c#. or how
to convert HTML to Formatted Plain Text in asp.net.
sometimes we needs to display text from a file or database that contain a HTML text or string. This text may be entered by the user that has not been formatted for HTML. In these cases, the text must be converted.
.Net already provide the HttpUtility.HtmlEncode() method to encode special characters so that they will appear as expected in a web browser.but problem is that , this method won't do anything with line breaks and paragraphs.
sometimes we needs to display text from a file or database that contain a HTML text or string. This text may be entered by the user that has not been formatted for HTML. In these cases, the text must be converted.
.Net already provide the HttpUtility.HtmlEncode() method to encode special characters so that they will appear as expected in a web browser.but problem is that , this method won't do anything with line breaks and paragraphs.
So When your application needs to convert or display unformatted text to Formatted Plain Text that contains multiple lines and paragraphs on a Web page, a little more work or code is required that we describe belows.
create awesome vertical menu using CSS Fancy vertical menu in CSS
<%@
Page Language="C#" AutoEventWireup="true" CodeFile="Default2.aspx.cs" Inherits="Default2" %>
<html xmlns="http://www.w3.org/1999/xhtml">
<head id="Head1" runat="server">
<title>How to
Convert HTML to Plain Text in asp.net </title>
</head>
<body>
<form id="form1" runat="server">
<asp:Label ID="Label1" runat="server" Text="Label"></asp:Label>
</form>
</body>
</html>
Default2.aspx.cs:-
using System.Data;
using
System.Data.SqlClient;
using
System.Configuration;
using System;
using
System.Web.UI.WebControls;
using
System.Text.RegularExpressions;
public partial class Default2 : System.Web.UI.Page
{
protected void Page_Load(object
sender, EventArgs e)
{
Label1.Text= StripHTML("<html><head></head><p>hi
how r u</p><h1>this is heading</h1></html>");
}
private string StripHTML(string
source)
{
try
{
string
result;
// Remove
HTML Development formatting
//
Replace line breaks with space
//
because browsers inserts space
result = source.Replace("\r", "
");
//
Replace line breaks with space
// because
browsers inserts space
result = result.Replace("\n", "
");
// Remove
step-formatting
result = result.Replace("\t", string.Empty);
// Remove
repeating spaces because browsers ignore them
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"( )+", " ");
// Remove
the header (prepare first by clearing attributes)
result = System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*head([^>])*>", "<head>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"(<(
)*(/)( )*head( )*>)", "</head>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(<head>).*(</head>)",
string.Empty,
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// remove
all scripts (prepare first by clearing attributes)
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*script([^>])*>", "<script>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"(<(
)*(/)( )*script( )*>)", "</script>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//result
= System.Text.RegularExpressions.Regex.Replace(result,
//
@"(<script>)([^(<script>\.</script>)])*(</script>)",
// string.Empty,
//
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"(<script>).*(</script>)",
string.Empty,
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// remove
all styles (prepare first by clearing attributes)
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*style([^>])*>", "<style>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"(<(
)*(/)( )*style( )*>)", "</style>",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(<style>).*(</style>)",
string.Empty,
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// insert
tabs in spaces of <td> tags
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*td([^>])*>", "\t",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// insert
line breaks in places of <BR> and <LI> tags
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*br( )*>", "\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*li( )*>", "\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// insert
line paragraphs (double line breaks) in place
// if
<P>, <DIV> and <TR> tags
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*div([^>])*>", "\r\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*tr([^>])*>", "\r\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<(
)*p([^>])*>", "\r\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// Remove
remaining tags like <a>, links, images,
//
comments etc - anything that's enclosed inside < >
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<[^>]*>",
string.Empty,
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//
replace special characters:
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"
", " ",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"•",
" * ",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"‹",
"<",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"›",
">",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result,
@"™",
"(tm)",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"⁄",
"/",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"<",
"<",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@">",
">",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result,
@"©",
"(c)",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"®",
"(r)",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// Remove
all others. More can be added, see
//
http://hotwired.lycos.com/webmonkey/reference/special_characters/
result =
System.Text.RegularExpressions.Regex.Replace(result,
@"&(.{2,6});",
string.Empty,
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// for
testing
//System.Text.RegularExpressions.Regex.Replace(result,
// this.txtRegex.Text,string.Empty,
//
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// make
line breaking consistent
result = result.Replace("\n", "\r");
// Remove
extra line breaks and tabs:
//
replace over 2 breaks with 2 and over 4 tabs with 4.
//
Prepare first to remove any whitespaces in between
// the
escaped characters and remove redundant tabs in between line breaks
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(\r)(
)+(\r)", "\r\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result = System.Text.RegularExpressions.Regex.Replace(result,
"(\t)(
)+(\t)", "\t\t",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(\t)(
)+(\r)", "\t\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(\r)(
)+(\t)", "\r\t",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// Remove
redundant tabs
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(\r)(\t)+(\r)",
"\r\r",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
// Remove
multiple tabs following a line break with just one tab
result =
System.Text.RegularExpressions.Regex.Replace(result,
"(\r)(\t)+",
"\r\t",
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
//
Initial replacement target string for line breaks
string
breaks = "\r\r\r";
//
Initial replacement target string for tabs
string
tabs = "\t\t\t\t\t";
for
(int index = 0; index < result.Length;
index++)
{
result = result.Replace(breaks,
"\r\r");
result = result.Replace(tabs, "\t\t\t\t");
breaks = breaks + "\r";
tabs = tabs + "\t";
}
// That's
it.
return
result;
}
catch
{
return
source;
}
}
}
0 comments:
Post a Comment