rtf2html-2.1

By | 2002-06-01

This code recieves RTF code as output by a Rich Text Box in VB or MS Word. It outputs the equivalent in HTML. It’s in a somewhat BETA form in that it handles a number of but not all of the possible codes. If you encounter a code it doesn’t properly convert just send it to me and I’ll try to fix the function within 24 hours. I think it does a better job on uncomplicated text than MS Word’s HTML conversion.

Original Author: Brady Hegberg

Inputs

String containing rich text to convert. Note: Currently the input must include the Rich-text header codes otherwise the function will return an empty string.

Assumptions

This function may get updated fairly regularly for awhile. Please download the file at the URL below for the latest version:
rtf2html.zip
Here’s an example of how to use the function with a rich text box (Note that the function also be used with rich text files.)
TextBoxHTML.Text = (RTF2HTML(TextBoxRTF.TextRTF))

Returns

String containing HTML code.

API Declarations

None

Code

Function RTF2HTML(strRTF As String) As String
  'Version 2.1 (3/30/99)
  
  'The most current version of this function is available at
  'http://www2.bitstream.net/~bradyh/downloads/rtf2html.zip
  
  'Converts Rich Text encoded text to HTML format
  'if you find some text that this function doesn't
  'convert properly please email the text to
  'bradyh@bitstream.net
  Dim strHTML As String
  Dim l As Long
  Dim lTmp As Long
  Dim lRTFLen As Long
  Dim lBOS As Long         'beginning of section
  Dim lEOS As Long         'end of section
  Dim strTmp As String
  Dim strTmp2 As String
  Dim strEOS            'string to be added to end of section
  Const gHellFrozenOver = False  'always false
  Dim gSkip As Boolean       'skip to next word/command
  Dim strCodes As String      'codes for ascii to HTML char conversion
  
  strCodes = "  {00}© {a9}´ {b4}« {ab}» {bb}¡ {a1}¿{bf}À{c0}à{e0}Á{c1}"
  strCodes = strCodes & "á{e1}Â {c2}â {e2}Ã{c3}ã{e3}Ä {c4}ä {e4}Å {c5}å {e5}Æ {c6}"
  strCodes = strCodes & "æ {e6}Ç{c7}ç{e7}Р {d0}ð  {f0}È{c8}è{e8}É{c9}é{e9}Ê {ca}"
  strCodes = strCodes & "ê {ea}Ë {cb}ë {eb}Ì{cc}ì{ec}Í{cd}í{ed}Î {ce}î {ee}Ï {cf}"
  strCodes = strCodes & "ï {ef}Ñ{d1}ñ{f1}Ò{d2}ò{f2}Ó{d3}ó{f3}Ô {d4}ô {f4}Õ{d5}"
  strCodes = strCodes & "õ{f5}Ö {d6}ö {f6}Ø{d8}ø{f8}Ù{d9}ù{f9}Ú{da}ú{fa}Û {db}"
  strCodes = strCodes & "û {fb}Ü {dc}ü {fc}Ý{dd}ý{fd}ÿ {ff}Þ {de}þ {fe}ß {df}§ {a7}"
  strCodes = strCodes & "¶ {b6}µ {b5}¦{a6}±{b1}·{b7}¨  {a8}¸ {b8}ª {aa}º {ba}¬  {ac}"
  strCodes = strCodes & "­  {ad}¯ {af}°  {b0}¹ {b9}² {b2}³ {b3}¼{bc}½{bd}¾{be}× {d7}"
  strCodes = strCodes & "÷{f7}¢ {a2}£ {a3}¤{a4}¥  {a5}"
  strHTML = ""
  lRTFLen = Len(strRTF)
  'seek first line with text on it
  lBOS = InStr(strRTF, vbCrLf & "deflang")
  If lBOS = 0 Then GoTo finally Else lBOS = lBOS + 2
  lEOS = InStr(lBOS, strRTF, vbCrLf & "par")
  If lEOS = 0 Then GoTo finally
  While Not gHellFrozenOver
    strTmp = Mid(strRTF, lBOS, lEOS - lBOS)
    l = lBOS
    While l <= lEOS
      strTmp = Mid(strRTF, l, 1)
      Select Case strTmp
      Case "{"
        l = l + 1
      Case "}"
        strHTML = strHTML & strEOS
        l = l + 1
      Case ""  'special code
        l = l + 1
        strTmp = Mid(strRTF, l, 1)
        Select Case strTmp
        Case "b"
          If ((Mid(strRTF, l + 1, 1) = " ") Or (Mid(strRTF, l + 1, 1) = "")) Then
            strHTML = strHTML & ""
            strEOS = "
" & strEOS
            If (Mid(strRTF, l + 1, 1) = " ") Then l = l + 1
          ElseIf (Mid(strRTF, l, 7) = "bullet ") Then
            strHTML = strHTML & "ÔÇó"  'bullet
            l = l + 6
          Else
            gSkip = True
          End If
        Case "e"
          If (Mid(strRTF, l, 7) = "emdash ") Then
            strHTML = strHTML & "ÔÇö"
            l = l + 6
          Else
            gSkip = True
          End If
        Case "i"
          If ((Mid(strRTF, l + 1, 1) = " ") Or (Mid(strRTF, l + 1, 1) = "")) Then
            strHTML = strHTML & ""
            strEOS = "
" & strEOS
            If (Mid(strRTF, l + 1, 1) = " ") Then l = l + 1
          Else
            gSkip = True
          End If
        Case "l"
          If (Mid(strRTF, l, 10) = "ldblquote ") Then
            strHTML = strHTML & "ÔÇ£"
            l = l + 9
          ElseIf (Mid(strRTF, l, 7) = "lquote ") Then
            strHTML = strHTML & "ÔÇÿ"
            l = l + 6
          Else
            gSkip = True
          End If
        Case "p"
          If ((Mid(strRTF, l, 6) = "plain") Or (Mid(strRTF, l, 6) = "plain ")) Then
            strHTML = strHTML & strEOS
            strEOS = ""
            If Mid(strRTF, l + 5, 1) = "" Then l = l + 4 Else l = l + 5  'catch next but skip a space
          Else
            gSkip = True
          End If
        Case "r"
          If (Mid(strRTF, l, 7) = "rquote ") Then
            strHTML = strHTML & "'"
            l = l + 6
          ElseIf (Mid(strRTF, l, 10) = "rdblquote ") Then
            strHTML = strHTML & "ÔÇØ"
            l = l + 9
          Else
            gSkip = True
          End If
        Case "t"
          If (Mid(strRTF, l, 4) = "tab ") Then
            strHTML = strHTML & Chr$(9)  'tab
            l = l + 3
          Else
            gSkip = True
          End If
        Case "'"
          strTmp2 = "{" & Mid(strRTF, l + 1, 2) & "}"
          lTmp = InStr(strCodes, strTmp2)
          If lTmp = 0 Then
            strHTML = strHTML & Chr("&H" & Mid(strTmp2, 2, 2))
          Else
            strHTML = strHTML & Trim(Mid(strCodes, lTmp - 8, 8))
          End If
          l = l + 2
        Case "~"
          strHTML = strHTML & " "
        Case "{", "}", ""
          strHTML = strHTML & strTmp
        Case vbLf, vbCr, vbCrLf  'always use vbCrLf
          strHTML = strHTML & vbCrLf
        Case Else
          gSkip = True
        End Select
        If gSkip = True Then
          'skip everything up until the next space or ""
          While ((Mid(strRTF, l, 1) <> " ") And (Mid(strRTF, l, 1) <> ""))
            l = l + 1
          Wend
          gSkip = False
          If (Mid(strRTF, l, 1) = "") Then l = l - 1
        End If
        l = l + 1
      Case vbLf, vbCr, vbCrLf
        l = l + 1
      Case Else
        strHTML = strHTML & strTmp
        l = l + 1
      End Select
    Wend
        
    lBOS = lEOS + 2
    lEOS = InStr(lEOS + 1, strRTF, vbCrLf & "par")
    If lEOS = 0 Then GoTo finally
    
    strHTML = strHTML & "
"
  Wend
  
finally:
  RTF2HTML = strHTML
End Function

Author: dwirch

Derek Wirch is a seasoned IT professional with an impressive career dating back to 1986. He brings a wealth of knowledge and hands-on experience that is invaluable to those embarking on their journey in the tech industry.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.