Please visit our sponsor
UNKNOWN !************************************** ! Name: Text Converter ! Description:Converts text from HTML or Ventura (another tag text format) to ASCII and from ASCII to 7bit chars. ! By: Dedux ! ! ! Inputs:Text in HTML, ventura or ASCII. ! ! Returns:the text after the convertion. ! !Assumes:(fixed a couple of incorrect symbol tags) They are shown in explorer but not in netscape. ! !Side Effects:This converter does not format tables, skip images and is a simple one made for personal purpouses. !This code is copyrighted and has limited warranties. !Please see http://www.Planet-Source-Code.com/xq/ASP/txtCodeId.236/lngWId.7/qx/vb/scripts/ShowCode.htm !for details. !************************************** unit TextConverterUnit; {ver 1.2} interface {Translate a string in Ventura format into HTML format.} function VenturaToHTML(const s:string):string; {HTLM to ASCII, very rudimentary} function HTMLtoASCII(const text:string):string; {all ascii chars are converter to the most similar 7 bit equivalent.} function ASCIIto7Bit(const s:string):string; {letters to uppercase, separators to space and insert space between numbers and letters} function uniformText(const text:string):string; {remove all tags enclosed by '&lt;' '&gt;' from the string.} function RemoveTags(const s :string):string; implementation uses SysUtils; const newLine = #13#10; CharSet1: array[32..222] of string = ( '&nbsp;', '!', '&quot;', { 032 - 034 } '#', '&curren;', '%', '&', '&sbquo;' ! , { 035 - 039 } '(', ')', '*', '+', ',', { 040 - ! 044 } '-', '.', '/', '0', '1', { 045 - ! 049 } ! '2', '3', '4', '5', '6',{ 050 - 054 } '7', '8', '9', ':', ';',{ 055 - 059 } '&lt;', '=', '&gt;', '?', '@', { 060 - 0 ! 64 } ! 'A', 'B', 'C', 'D', 'E',{ 065 - 069 } ! 'F', 'G', 'H', 'I', 'J',{ 070 - 074 } ! 'K', 'L', 'M', 'N', 'O',{ 075 - 079 } ! 'P', 'Q', 'R', 'S', 'T',{ 080 - 084 } ! 'U', 'V', 'W', 'X', 'Y',{ 085 - 089 } ! 'Z', '|', '\', '|', '^',{ 090 - 094 } '_', '&quot;', 'a', 'b', 'c',{ 095 - 099 ! } ! 'd', 'e', 'f', 'g', 'h',{ 100 - 104 } ! 'i', 'j', 'k', 'l', 'm',{ 105 - 109 } ! 'n', 'o', 'p', 'q', 'r',{ 110 - 114 } ! 's', 't', 'u', 'v', 'w',{ 115 - 119 } ! 'x', 'y', 'z', '{', '|',{ 120 - 124 } '}', '&tilde;', '&nbsp;', '&Ccedil;', '& ! uuml;', { 125 - 129 } '&eacute;', '&acirc;', '&auml;', '&agrav; ! e;', '&aring;', { 130 - 134 } '&ccedil;', '&ecirc;', '&euml;', '&egrav; ! e;', '&iuml;', { 135 - 139 } '&icirc;', '&igrave;', '&Auml;', '&Aring; ! ;', '&Eacute;', { 140 - 145 } '&aelig;', '&AElig;', '&ouml;', '&ocirc; ! ', '&ograve;', { 146 - 149 } '&ucirc;', '&ugrave;', '&yuml;', '&Ouml; ! ', '&Uuml;', { 150 - 154 } '&cent;', '&pound;', '&yen;', '???', '&f; ! nof;', { 155 - 159 } '&aacute;', '&iacute;', '&oacute;', '&ua; ! cute;', '&ntilde;', { 160 - 164 } '&Ntilde;', '&ordf;', '&ordm;', '&iquest; ! ;', '&ldquo;', { 165 - 169 } '&rdquo;', '&lang;', '&rang;', '&iexcl;' ! , '&laquo;', { 170 - 174 } '&raquo;', '&atilde;', '&otilde;', '&Osl; ! ash;', '&oslash;', { 175 - 179 } '&oelig;', '&OElig;', '&Agrave;', '&Atil; ! de;', '&Otilde;', { 180 - 184 } '&sect;', '&#8225;', '&#8224;', '&para;' ! , '&copy;', { 185 - 189 } '&reg;', '&trade;', ',,', '...', '%0', ! { 190 - 194 } '&bull;', '&ndash;', '&mdash;', '&deg;', ! '&Aacute;', { 195 - 199 } '&Acirc;', '&Egrave;', '&Ecirc;', '&Euml; ! ;', '&Igrave;', { 200 - 204 } '&Iacute;', '&Icirc;', '&Iuml;', '&Ograv; ! e;', '&Oacute;', { 205 - 209 } '&Ocirc;', 'S', '&s;', '&Ugrave;', '&Uacu; ! te;', { 210 - 214 } '&Ucirc;', '&#159;', '&Beta;', { ! 215 - 217 } '&#218;', '&#219;', '&#220;', '&#221;', ! '&#222;' );{ 218 - 222 } CharSet2 : Array[32..217] of Char = ( ' ','!',#34,'#','$','%','&','''','(',')' ! ,'*','+',',','-','.','/','0','1','2','3' ! , '4','5','6','7','8','9',':',';','&lt;','=', ! '&gt;','?','@','A','B','C','D','E','F', 'G','H','I','J','K','L','M','N','O','P', ! 'Q','R','S','T','U','V','W','X','Y', 'Z','[','\',']','^','_','''','a','b','c' ! ,'d','e','f','g','h','i','j','k','l', 'm','n','o','p','q','r','s','t','u','v', ! 'w','x','y','z','{','|','}','~',' ', 'Ç','¡','¢','£','¤','¥','¦','ç','§','¨', ! '©','«','¬','­','®','¯','°','±','²', '³','´','µ','¶','·','¸','¹','º','»','¼', ! '½','¾','¿','À','Á','Â','Ã','Ä','Å', 'Æ','º','¿','&#8220;','&#8221;','&#8216; ! ','’','¡','«','»','ã','õ','Ò','Ó', ! 'Ô','&#338;','À','Ã','Õ', '§','&#8225;','&#8224;','¶','©','®','&#8; ! 482;','&#8222;','&#8230;','â','ã','ä','& ! #8212;','°','À','Â','É','Ê','Ë', 'Ì','Í','Î','Ï','Ò','Ó','Ô','&#352;','&# ! 353;','Ù','Û','Ü','Ý','ß'); ANSIToHTMLMap : array['&#8482;'..'ÿ'] of string = ( '&trade;', '&#154;', '&#155;', '&#156;', '&#157;', '&#158;', '&#159;', '&nbsp;', '&iexcl;', '&cent;', '&pound;' ! , '&curren;', { 160 - 164 } '&yen;', '&brvbar;', '&sect;', '&uml;', ! '&copy;', { 165 - 169 } '&ordf;', '&laquo;', '&not;', '&shy;', ' ! &reg;', { 170 - 174 } '&macr;', '&deg;', '&plusmn;', '&sup2;', ! '&sup3;', { 175 - 179 } '&acute;', '&micro;', '&para;', '&middot; ! ;', '&cedil;', { 180 - 184 } '&sup1;', '&ordm;', '&raquo;', '&frac14; ! ', '&frac12;', { 185 - 189 } '&frac34;', '&iquest;', '&Agrave;', '&Aa; ! cute;', '&Acirc;',{ 190 - 194 } '&Atilde;', '&Auml;', '&Aring;', '&AElig; ! ;', '&Ccedil;', { 195 - 199 } '&Egrave;', '&Eacute;', '&Ecirc;', '&Eum; ! l;', '&Igrave;', { 200 - 204 } '&Iacute;', '&Icirc;', '&Iuml;', '&ETH;' ! , '&Ntilde;', { 205 - 209 } '&Ograve;', '&Oacute;', '&Ocirc;', '&Oti; ! lde;', '&Ouml;', { 210 - 214 } '&times;', '&Oslash;', '&Ugrave;', '&Uac; ! ute;', '&Ucirc;', { 215 - 219 } '&uml;', '&Yacute;', '&THORN;', '&szlig; ! ', '&agrave;', { 220 - 224 } '&aacute;', '&acirc;', '&atilde;', '&aum; ! l;', '&aring;', { 225 - 229 } '&aelig;', '&ccedil;', '&egrave;', '&eac; ! ute;', '&ecirc;', { 230 - 234 } '&euml;', '&igrave;', '&iacute;', '&icir; ! c;', '&iuml;', { 235 - 239 } '&eth;', '&ntilde;', '&ograve;', '&oacut; ! e;', '&ocirc;', { 240 - 244 } '&otilde;', '&ouml;', '&divide;', '&osla; ! sh;', '&ugrave;', { 245 - 249 } '&uacute;', '&ucirc;', '&uuml;', '&yacut; ! e;', '&thorn;', { 250 - 254 } '&yuml;' { 255 } ! ); ANSITo7Bit : array['&#8482;'..'ÿ'] of string = ( '(TM)', ' ', ' ', 'oe', '', 'z', 'Y', ' ', 'i', 'C', 'L', '$', { 160 - 16 ! 4 } 'Y', ' ', ' ', ' ', '(C)', { 165 - ! 169 } 'a','&lt;&lt;',' ', '-', '(R)', { 170 - 1 ! 74 } '-', '''', '+-', '2', '3', { 175 - ! 179 } 'a', 'u', ' ', '.', 'c;', { 180 - 184 ! } '1;', 'o', '&gt;&gt;', '1/4', '1/2', { 18 ! 5 - 189 } '3/4;', ' ', 'A', 'A', 'A', { 190 - ! 194 } 'A', 'A', 'A', 'AE', 'C', { 195 - ! 199 } 'E', 'E', 'E', 'E', 'I', { 200 - 20 ! 4 } 'I', 'I', 'I', 'D', 'N', { 205 - 20 ! 9 } 'O', 'O', 'O', 'O', 'O', { 210 - 214 ! } 'x', 'O', 'U', 'U', 'U', { 215 - 219 ! } 'U', 'Y', 'P', 'B', 'a', { 220 - 224 ! } 'a', 'a', 'a', 'a', 'a', { 225 - 229 ! } 'ae','c', 'e', 'e', 'e', { 230 - 234 ! } 'e', 'i', 'i', 'i', 'i', { 235 - 239 ! } 'o', 'n', 'o', 'o', 'o', { 240 - 244 ! } 'o', 'o', '%', 'o', 'u', { 245 - 249 ! } 'u', 'u', 'u', 'y', 'p', { 250 - 254 ! } ! 'y' { 255 } ); function isAlpha(const c:char):boolean; begin Result := c in ['a'..'z', 'ñ', 'A'..'Z', 'Ñ']; end; function isNumeric(const c:char):boolean; begin Result := c in ['0'..'9']; end; function isAlphaNumeric(const c:char):boolean; begin Result := isAlpha(c) or isNumeric(c); end; {remove all tags enclosed by '&lt;' '&gt;' from the string.} function RemoveTags(const s :string) :string; var i :Integer; InTag :Boolean; L :Integer; tag:string; begin Result := ''; InTag := False; tag:= ''; L := Length(s); for i := 1 to L do begin if s[i]='&lt;' then inTag := True; if inTag then tag := tag + s[i] else if not (s[i] in [#10, #13]) then Result := Result + s[i]; if s[i]='&gt;' then begin {tag replaces} tag := uppercase(tag); if (tag = '</td>') then {P, TD} Result := Result + ' '; if (tag = '</p>') or (tag = '<br>') or (tag = '</tr>') or (tag = '<li>') or (tag = '</ol>') or (tag = '</ul>') or (StrLComp(Pchar(tag),'</h*>',3) = 0) then {BR, H*, TR, LI} Result := Result + newLine; tag := ''; inTag := False; end; end; end; function VenturaToHTML(const s :string) :string; var i :Integer; InTag :Boolean; Normalize, Number :string; Code :Integer; CharSet :Integer; L :Integer; begin Result := ''; Normalize := ''; i := 1; InTag := False; CharSet := 1; L := Length(s); while i &lt;= L do begin if s[i] = '&lt;' then InTag := True else if s[i] = '&gt;' then begin InTag := False; if i &lt; L then if s[i+1] in ['0'..'9'] then Result := Result + ' '; end else if (not InTag) then if s[i] in ['&#8482;'..'ÿ'] then Result := Result + ANSIToHTMLMap[s[i]] else begin if s[i] = #13 then Result := Result + '<p>' + #13 else Result := Result + s[i]; end else begin case s[i] of 'S': ; ! 'b', 'B', ! 'x', 'X': begin Result := Result + '<b>'; Normalize := '</b>' + Normalize; end; ! 'i', 'I': begin Result := Result + '<i>'; Normalize := '</i>' + Normalize; end; ! 'd', 'D': begin Number := Copy(s, i, 5); if Number = 'DJ255' then Inc(i, 4); Result := Result + Normalize; Normalize := ''; end; ! 'm', 'M': begin Number := Copy(s, i, 7); if Number = 'MSJ243&gt;' then begin Inc(i, 5); Result := Result + '<sup>'; Normalize := '</sup>' + Normalize; end; end; ! 'v', 'V': begin Result := Result + '<sub>'; Normalize := '</sub>' + Normalize; end; ! '^': begin Result := Result + '<sup>'; Normalize := '</sup>' + Normalize; end; ! 'R': begin Result := Result + '<p>'; end; ! 'T': begin { TO DO: Process hyperlink. } end; ! 'J': begin Inc(i); Number := Copy(s, i, 3); if Number = '243' then begin Result := Result + '<sup>'; Normalize := '</sup>' + Normalize; end else begin Result := Result + Normalize; if CharSet = 1 then Result := Result + '<font name="Verdana," times new roman>' else Result := Result + '<font name="Symbol">'; Normalize := '</font>'; end; end; ! 'F': begin Inc(i); Number := Copy(s, i, 3); Inc(i, 2); if Number = '128' then begin CharSet := 2; Result := Result + '<font name="Symbol">'; end else begin CharSet := 1; Result := Result + '<font name="Verdana," times new roman>' end; Normalize := '</font>' + Normalize; end; ! 'P': begin Number := Copy(s, i, 3); Result := Result + '<font size=" + Number + ">'; Normalize := '</font>' + Normalize; Inc(i, 2); end; ! 'H': begin Number := Copy(s, i, 4); Inc(i, 3); if Number = 'HIDE' then break; end; ! '$': begin Inc(i); Number := Copy(s, i, 4); if Number = 'E1/2' then Result := Result + '&frac12;' else if Number = 'E1/4' then Result := Result + '&frac14;' else if Number = 'E3/4' then Result := Result + '&frac34;'; Inc(i, 3); end; ! '0'..'9': begin Number := ''; while s[i] in ['0'..'9'] do begin Number := Number + s[i]; Inc(i); end; Dec(i); Code := StrToInt(Number); if CharSet = 2 then begin if (Code &gt; 31) and (Code &lt; 217) then begin case Code of 032: Result := Result + '&nbsp;'; 033: Result := Result + '&iexcl;'; 034: Result := Result + '&#34;'; { Should be inverted. &#2200;} 035: Result := Result + '&#35;'; 036: Result := Result + '&#36;'; { Should be inverted. &#2203;} 037: Result := Result + '&#37;'; 038: Result := Result + '&'; 039: Result := Result + '&#39;'; 040: Result := Result + '&#40;'; 041: Result := Result + '&#41;'; 042: Result := Result + '&#42;'; 043: Result := Result + '&#43;'; 044: Result := Result + '&#44;'; 045: Result := Result + '&mdash;'; 046..057: Result := Result + Char(Code); 058: Result := Result + '&#58;'; 059: Result := Result + '&#59;'; 060: Result := Result + '&lt;'; 061: Result := Result + '='; 062: Result := Result + '&gt;'; 063: Result := Result + '?'; 064: Result := Result + '&#64;'; 065,066: Result := Result + Char(Code); 067: Result := Result + '&Chi;'; 068: Result := Result + '&Delta;'; 069: Result := Result + 'E'; 070: Result := Result + '&Iota;'; 071: Result := Result + '&Gamma;'; 072: Result := Result + '&Eta;'; 073: Result := Result + '&Iota;'; 074: Result := Result + '&#74;'; 075: Result := Result + '&Kappa;'; 076: Result := Result + '&Lambda;'; 077: Result := Result + '&Mu;'; 078: Result := Result + '&Nu;'; 079: Result := Result + '&Omicron;'; 080: Result := Result + '&Pi;'; 081: Result := Result + '&Theta;'; 082: Result := Result + '&Rho;'; 083: Result := Result + '&Sigma;'; 084: Result := Result + '&Tau;'; 085: Result := Result + '&Upsilon;'; 086: Result := Result + '&#86;'; end; end; end else begin if Code in [33..222] then Result := Result + CharSet1[Code] else Result := Result + Char(Code); end; end; end; end; Inc(i); end; Result := Result + Normalize; end; {gives the ascii symbol for a & tag} function HTMLSymbolToASCII(const s:string):string; var i:char; begin Result := ' '; for i:= LOW(ANSIToHTMLMap) to HIGH(ANSItoHTMLMap) do begin if CompareStr(ANSItoHTMLMap[i],s) = 0 then begin Result := i; exit; end; end; end; function HTMLtoASCII(const text:string):string; var inTag:boolean; L,i:integer; s,Tag:string; begin {remove tags} s := RemoveTags(text); {process symbols} Result := ''; InTag := False; L := Length(s); for i := 1 to L do begin if s[i]='&' then begin inTag := True; Tag:=''; end; if not inTag then Result := Result + s[i] else Tag := Tag + s[i]; if s[i]=';' then begin inTag := False; Result := Result + HTMLSymbolToASCII(tag); end; end; end; function ASCIIto7Bit(const s:string):string; var i:integeR; begin Result := ''; for i := 1 to length(s) do begin if s[i] &gt; '&#8482;' then result := result + ANSITo7Bit[s[i]] else result := result + s[i]; end; end; function uniformText(const text:string):string; var i:integer; begin Result := ''; for i := 1 to length(text) do begin if (i&gt;1) and (isAlpha(text[i-1]) and isNumeric(text[i])) or (isNumeric(text[i-1]) and isAlpha(text[i])) then Result := Result + ' '; if isAlphanumeric(text[i]) then result := result + AnsiUppercase(text[i]) else result := result + ' '; end; end; end.