Willem's...

{rue if I mellow}

  Home  |   Contact  |   Syndication    |   Login
  25 Posts | 0 Stories | 120 Comments | 53 Trackbacks

News

Archives

Post Categories

Businessware Architects

XML-FX.COM

If you post code to a blog, your own website, or code-developer sites, you often require the capability to embed code-excerpts into your writings. The easiest solution is to make use of an HTML generator that renders your source-code to HTML.

This has already been done - many times - but most of these solutions rely on using class or CSS style tags. Although elegant, the HTML generated is often not suitable for publishing without modification. A comprehensive example of this type of solution is here:

Instead, a solution should be used that 'hard-codes' the style and other tags directly into the HTML.  I found such a code-class here:

The code-class is written in VB.NET. I used CSharpener to convert the code to C# (hmmm....only because I prefer C#), and then modified the code to produce something which is crude (and slightly buggy), but very effective.

You can test the tool here:

You can download my converter class (C#) here:

The CodeToHTML class is shown below, using the (unmodified) tool output:

CodeToHTML.cs
using System;
using System.Web;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Data;
using System.IO;
namespace CodeFormatter
{
  public class FontStyle
  {
    public string Style = null;
    public FontStyle() {}
    public FontStyle(string Style)
    {
      this.Style = Style;
    }
    public string BeginTag
    {
      get
      {
        return "<font" +
          (string)((Style != null) ? " " + Style + ">" : ">");
      }
    }
    public string EndTag
    {
      get
      {
        return "</font>";
      }
    }
  }
  public class PageStyle
  {
    public FontStyle Font = new FontStyle("color=\"black\" face=\"Courier New\" size=\"2\"");
    public string Style = null;
    public PageStyle() {}
    public PageStyle(string Style)
    {
      this.Style = Style;
    }
    public string BeginTag
    {
      get
      {
        return "<table" +
          (string)((Style != null) ? " " + Style + ">" : ">") +
          "<tr><td><pre>" + Font.BeginTag;
      }
    }
    public string EndTag
    {
      get
      {
        return Font.EndTag + "</pre></td></tr></table>";
      }
    }
  }
  public class CodeToHTML
  {
    public class CodeStyle
    {
      public PageStyle Page = new PageStyle();
      public FontStyle Comment = new FontStyle("color=\"green\"");
      public FontStyle Keyword = new FontStyle("color=\"blue\"");
      public FontStyle XmlTag = new FontStyle("color=\"maroon\"");
      public CodeStyle() {}
    }
    private bool showFileName = false;
    private string language = "";
    private CodeStyle codeStyle = new CodeStyle();
    public CodeStyle Style
    {
      get 
      { 
        return codeStyle;
      }
      set
      {
        codeStyle = value;
      }
    }
    private int tabSize = 4;
    public int TabSize
    {
      get
      {
        return tabSize;
      }
      set
      {
        tabSize = value;
      }
    }
    public bool ShowFileName
    {
      get
      {
        return showFileName;
      }
      set
      {
        showFileName = value;
      }
    }
    public string Language
    {
      get
      {
        return language;
      }
      set
      {
        language = value;
      }
    }
    private void SetLanguageFromFileName(string filePath)
    {
      // Find the current language from the file extension
      FileInfo info = new FileInfo(filePath);
      string fileExt = info.Extension.ToLower().Trim(new char[]{'.'});
      switch(fileExt)
      {
        case ProgrammingLanguage.VB:
          language = ProgrammingLanguage.VB;
          break;
        case ProgrammingLanguage.CSharp:
          language = ProgrammingLanguage.CSharp;
          break;
        case ProgrammingLanguage.JSharp:
          language = ProgrammingLanguage.JSharp;
          break;
      }
    }
    public string RenderFile(string filePath)
    {
      // errors?
      this.SetLanguageFromFileName(filePath);
      return Render(File.OpenText(filePath));
    }
    public void RenderFile(string filePath, string outputFilePath)
    {
      // Render and throw error
      this.SetLanguageFromFileName(filePath);
      StreamWriter writer = new StreamWriter(outputFilePath);
      writer.Write(Render(File.OpenText(filePath)));
      writer.Flush();
      writer.Close();
    }
    public string RetabAndTrim(string inputString)
    {
      string tab = "";
      for(int i = 0; i < tabSize; i++)
      {
        tab += " ";
      }
      StringBuilder builder = new StringBuilder();
      StringWriter writer = new StringWriter(builder);
      // Split into a string array for processing
      inputString = inputString.Replace("\r\n", "\r");
      inputString = inputString.Replace("\n\r", "\r");
      string[] lines = inputString.Split(new char[]{'\r'});
      foreach(string line in lines)
      {
        string output = line;
        output = output.TrimEnd(new char[]{'\n','\r',' '});
        output = output.Replace(tab, "\t");
        output = output.TrimStart(new char[]{' '});
        writer.WriteLine(output);
      }
      writer.Flush();
      return writer.ToString();
    }
    public void RetabAndTrimFile(string filePath, string outputFilePath)
    {
      StreamReader reader = File.OpenText(filePath);
      string output = this.RetabAndTrim(reader.ReadToEnd());
      reader.Close();
      StreamWriter writer = new StreamWriter(outputFilePath);
      writer.Write(output);
      writer.Flush();
      writer.Close();
    }
    public string Render(StreamReader textReader)
    {
      return Render(textReader.ReadToEnd());
    }
    public string Render(string inputString)
    {
      string tab = "";
      for(int i = 0; i < tabSize; i++)
      {
        tab += " ";
      }
      StringBuilder builder = new StringBuilder();
      StringWriter writer = new StringWriter(builder);
      // Split into a string array for processing
      inputString = inputString.Replace("\r\n", "\r");
      inputString = inputString.Replace("\n\r", "\r");
      inputString = inputString.Replace("\t", tab);
      string[] lines = inputString.Split(new char[]{'\r'});
      // Process the language
      switch(language.Trim().ToLower())
      {
        case ProgrammingLanguage.CSharp:
          writer.Write(codeStyle.Page.BeginTag);
          foreach(string line in lines)
          {
            writer.WriteLine(this.FixCSLine(line));
          }
          writer.Write(codeStyle.Page.EndTag);
          break;
        case ProgrammingLanguage.JSharp:
          writer.Write(codeStyle.Page.BeginTag);
          foreach(string line in lines)
          {
            writer.WriteLine(this.FixJSLine(line));
          }
          writer.Write(codeStyle.Page.EndTag);
          break;
        case ProgrammingLanguage.VB:
          writer.Write(codeStyle.Page.BeginTag);
          foreach(string line in lines)
          {
            writer.WriteLine(this.FixVBLine(line));
          }
          writer.Write(codeStyle.Page.EndTag);
          break;
        default:
          bool isInScriptBlock = false;
          bool isInMultiLine = false;
          writer.Write(codeStyle.Page.BeginTag);
          foreach(string line in lines)
          {
            language = this.GetLanguageFromLine(line, language);
            if(this.IsScriptBlockTagStart(line))
            {
              writer.WriteLine(this.FixASPXLine(line));
              isInScriptBlock = true;
            }
            else
            {
              if(this.IsScriptBlockTagEnd(line))
              {
                writer.WriteLine(this.FixASPXLine(line));
                isInScriptBlock = false;
              }
              else
              {
                if(this.IsMultiLineTagStart(line) &
                  isInMultiLine == false)
                {
                  writer.Write("<font color=blue><b>" +
                    HttpUtility.HtmlEncode(line));
                  isInMultiLine = true;
                }
                else
                {
                  if(this.IsMultiLineTagEnd(line)
                    & isInMultiLine == true)
                  {
                    writer.Write(HttpUtility.HtmlEncode(line) +
                      "</b></font>");
                    isInMultiLine = false;
                  }
                  else
                  {
                    if(isInMultiLine)
                    {
                      writer.Write(HttpUtility.HtmlEncode(line));
                    }
                    else
                    {
                      if(isInScriptBlock)
                      {
                        switch(language.Trim().ToLower())
                        {
                          case ProgrammingLanguage.CSharp:
                            writer.WriteLine(this.FixCSLine(line));
                            break;
                          case ProgrammingLanguage.JSharp:
                            writer.WriteLine(this.FixJSLine(line));
                            break;
                          case ProgrammingLanguage.VB:
                            writer.WriteLine(this.FixVBLine(line));
                            break;
                          default:
                            writer.WriteLine(this.FixVBLine(line));
                            break;
                        }
                      }
                      else
                      {
                        writer.WriteLine(this.FixASPXLine(line));
                      }
                    }
                  }
                }
              }
            }
          }
          writer.Write(codeStyle.Page.EndTag);
          // aspx-page sorted out
          break;
      }
      writer.Flush();
      return writer.ToString();
    }
    private string GetLanguageFromLine(string line, string defaultLang)
    {
      // Returns name of the language
      string returnString = defaultLang;
      if(line.Length == 0)
      {
        return returnString;
      }
      Match langMatch = Regex.Match(line,
        @"(?i)<%@\s*Page\s*.*Language\s*=\s*""(?<lang>[^""]+)""");
      if(langMatch.Success)
      {
        returnString = langMatch.Groups["lang"].ToString();
      }
      langMatch = Regex.Match(line,
        @"(?i)(?=.*runat\s*=\s*""?server""?)" +
        @"<script.*language\s*=\s*""(?<lang>[^""]+)"".*>");
      if(langMatch.Success)
      {
        returnString = langMatch.Groups["lang"].ToString();
      }
      langMatch = Regex.Match(line,
        @"(?i)<%@\s*WebService\s*.*Language\s*=\s*""?(?<lang>[^""]+)""?" );
      if(langMatch.Success)
      {
        returnString = langMatch.Groups["lang"].ToString();
      }
      // "CS" instead of "C#" ?
      if(returnString == "CS")
      {
        returnString = ProgrammingLanguage.CSharp;
      }
      return returnString;
    }
    private string FixCSLine(string line)
    {
      string outLine = line;
      if(line.Length == 0)
      {
        return line;
      }
      outLine = Regex.Replace(line, @"(?i)(\t)", " ");
      outLine = HttpUtility.HtmlEncode(outLine);
      string[] keywords =
      {
        "private", "protected", "public", "namespace", "class",
        "break", "for", "if", "else", "while", "switch", "case",
        "using", "return", "null", "void", "int", "bool", "string",
        "float", "this", "new", "true", "false", "const", "static", "base",
        "foreach", "in", "try", "catch", "get", "set", "char", "default"
      };
      string combinedKeywords = "(?<keyword>" + string.Join("|", keywords) + ")";
      outLine = Regex.Replace(outLine,
        @"\b" + combinedKeywords + @"\b(?<!//.*)", 
        codeStyle.Keyword.BeginTag + "${keyword}" + codeStyle.Keyword.EndTag);
      outLine = Regex.Replace(outLine,
        "(?<comment>//.*$)", 
        codeStyle.Comment.BeginTag + "${comment}" + codeStyle.Comment.EndTag);
      return outLine;
    }
    private string FixJSLine(string line)
    {
      string outLine = line;
      if(line.Length == 0)
      {
        return line;
      }
      outLine = Regex.Replace(line, @"(?i)(\t)", " ");
      outLine = HttpUtility.HtmlEncode(outLine);
      string[] keywords =
      {
        "private", "protected", "public", "namespace", "class",
        "var", "for", "if", "else", "while", "switch", "case",
        "using", "get", "return", "null", "void", "int", "string",
        "float", "this", "set", "new", "true", "false", "const",
        "static", "package", "function", "internal", "extends",
        "super", "import", "default", "break", "try", "catch", "finally"
      };
      string combinedKeywords = "(?<keyword>" + string.Join("|", keywords) + ")";
      outLine = Regex.Replace(outLine,
        @"\b" + combinedKeywords + @"\b(?<!//.*)", 
        codeStyle.Keyword.BeginTag + "${keyword}" + codeStyle.Keyword.EndTag);
      outLine = Regex.Replace(outLine,
        "(?<comment>//.*$)", 
        codeStyle.Comment.BeginTag + "${comment}" + codeStyle.Comment.EndTag);
      return outLine;
    }
    private string FixVBLine(string line)
    {
      string outLine = line;
      if(line.Length == 0)
      {
        return line;
      }
      outLine = Regex.Replace(line, @"(?i)(\t)", " ");
      outLine = HttpUtility.HtmlEncode(outLine);
      string[] keywords =
      {
        "AddressOf", "Delegate", "Optional", "ByVal", "ByRef", "Decimal",
        "Boolean", "Option", "Compare", "Binary", "Text", "On", "Off",
        "Explicit", "Strict", "Private", "Protected", "Public", "End Namespace",
        "Namespace", "End Class", "Exit", "Class", "Goto", "Try", "Catch",
        "End Try", "For", "End If", "If", "Else", "ElseIf", "Next", "While",
        "And", "Do", "Loop", "Dim", "As", "End Select", "Select", "Case", "Or",
        "Imports", "Then", "Integer", "Long", "String", "Overloads", "True",
        "Overrides", "End Property", "End Sub", "End Function", "Sub", "Me",
        "Function", "End Get", "End Set", "Get", "Friend", "Inherits",
        "Implements", "Return", "Not", "New", "Shared", "Nothing", "Finally",
        "False", "Me", "My", "MyBase", "End Enum", "Enum" };
      string combinedKeywords = "(?<keyword>" + string.Join("|", keywords) + ")";
      outLine = Regex.Replace(outLine,
        @"(?i)\b" + combinedKeywords + @"\b(?<!'.*)", 
        codeStyle.Keyword.BeginTag + "${keyword}" + codeStyle.Keyword.EndTag);
      outLine = Regex.Replace(outLine,
        "(?<comment>'(?![^']*&quot;).*$)", 
        codeStyle.Comment.BeginTag + "${comment}" + codeStyle.Comment.EndTag);
      return outLine;
    }
    private string FixASPXLine(string line)
    {
      string outLine = line;
      string searchExp = null;
      string replaceExp = null;
      if(line.Length == 0)
      {
        return line;
      }
      // Search for \t and replace it with 4 spaces
      outLine = Regex.Replace(outLine, @"(?i)(\t)", " ");
      outLine = HttpUtility.HtmlEncode(outLine);
      // Single line comment or #include references.
      searchExp = "(?i)(?<a>(^.*))(?<b>(&lt;!--))(?<c>(.*))(?<d>(--&gt;))(?<e>(.*))";
      replaceExp = "${a}" + codeStyle.Comment.BeginTag + 
        "${b}${c}${d}" + codeStyle.Comment.EndTag + "${e}";
      if(Regex.IsMatch(outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize <%@ <type>
      searchExp = "(?i)" + "(?<a>(&lt;%@))" + "(?<b>(.*))" + "(?<c>(%&gt;))";
      replaceExp = "<font color=blue><b>${a}${b}${c}</b></font>";
      if(Regex.IsMatch(outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize <%# <type>
      searchExp = "(?i)" + "(?<a>(&lt;%#))" + "(?<b>(.*))" + "(?<c>(%&gt;))";
      replaceExp = "${a}" + "<font color=red><b>" + "${b}" + "</b></font>" + "${c}";
      if(Regex.IsMatch(outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize tag <type>
      searchExp =
        "(?i)" + 
        "(?<a>(&lt;)(?!%)(?!/?asp:)(?!/?template)(?!/?property)(?!/?ibuyspy:)(/|!)?)" +
        @"(?<b>[^;\s&]+)" + @"(?<c>(\s|&gt;|\Z))";
      replaceExp = "${a}" + 
        codeStyle.XmlTag.BeginTag + "${b}" + codeStyle.XmlTag.EndTag + "${c}";
      if(Regex.IsMatch(outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize asp:|template for runat=server tags <type>
      searchExp = "(?i)(?<a>&lt;/?)(?<b>(asp:|template|property|IBuySpy:).*)(?<c>&gt;)?";
      replaceExp = "${a}" + codeStyle.Keyword.BeginTag + "<b>${b}</b>" + 
        codeStyle.Keyword.EndTag + "${c}";
      if(Regex.IsMatch( outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize begin of tag char(s) "<","</","<%"
      searchExp = "(?i)(?<a>(&lt;)(/|!|%)?)";
      replaceExp = codeStyle.Keyword.BeginTag + "${a}" + codeStyle.Keyword.EndTag;
      if(Regex.IsMatch( outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      // Colorize end of tag char(s) ">","/>"
      searchExp = "(?i)(?<a>(/|%)?(&gt;))";
      replaceExp = codeStyle.Keyword.BeginTag + "${a}" + codeStyle.Keyword.EndTag;
      if(Regex.IsMatch( outLine, searchExp))
      {
        outLine = Regex.Replace(outLine, searchExp, replaceExp);
      }
      return outLine;
    }
    private bool IsScriptBlockTagStart(string line)
    {
      bool returnCode = false;
      if(Regex.IsMatch(line, @"<script.*runat=""?server""?.*>"))
      {
        returnCode = true;
      }
      else
      {
        if(Regex.IsMatch(line, @"(?i)<%@\s*WebService"))
        {
          returnCode = true;
        }
      }
      return returnCode;
    }
    private bool IsScriptBlockTagEnd(string line)
    {
      bool returnCode = false;
      if(Regex.IsMatch(line, "</script.*>"))
      {
        returnCode = true;
      }
      return returnCode;
    }
    private bool IsMultiLineTagStart(string line)
    {
      bool returnCode = false;
      string outLine = null;
      string searchExp =
        "(?i)(?!.*&gt;)(?<a>&lt;/?)(?<b>(asp:|template|property|IBuySpy:).*)";
      outLine = HttpUtility.HtmlEncode( line );
      if(Regex.IsMatch(outLine, searchExp))
      {
        returnCode = true;
      }
      return returnCode;
    }
    private bool IsMultiLineTagEnd(string line)
    {
      bool returnCode = false;
      string outLine = null;
      string searchExp = "(?i)&gt;";
      outLine = HttpUtility.HtmlEncode(line);
      if(Regex.IsMatch( outLine, searchExp))
      {
        returnCode = true;
      }
      return returnCode;
    }
    private class ProgrammingLanguage
    {
      public const string VB = "vb";
      public const string CSharp = "cs";
      public const string JSharp = "js";
    }
    public static void Test(string inputFilePath, string outputFilePath,
      int tabSize, CodeToHTML.CodeStyle style)
    {
      CodeToHTML converter = new CodeToHTML();
      converter.TabSize = tabSize;
      converter.Style = style;
      FileInfo fi = new FileInfo(inputFilePath);
      string tempFileName = fi.Name;
      fi = new FileInfo(outputFilePath);
      string tempFixedFilePath = fi.FullName.Replace(fi.Name, "") +
        tempFileName;
      converter.RetabAndTrimFile(inputFilePath, tempFixedFilePath);
      converter.RenderFile(tempFixedFilePath, outputFilePath);
    }
  }
}

posted on Monday, October 31, 2005 3:24 AM

Feedback

# re: Code-To-HTML converter 12/6/2005 7:00 AM Tim Haughton
Looks good. I stumbled across your page and this one...

http://www.jtleigh.com/people/colin/software/CopySourceAsHtml/

Before, I was pasting the code into Word, then saving as html and copying the markup. A little long winded.

# re: html-to-doc converter 2/4/2008 12:05 AM balaraju
hi sir,

please send me code for htmi to document.

Thanking You.

Post Feedback

Title:
Name:
Email: (never displayed)
Url:
Comments: