package iiuf.util; import java.util.Vector; /** Unicode to TeX translator. Needs package textcomp for certain translations. (c) 1999, 2000, 2001, IIUF, DIUF<p> @author $Author: ohitz $ @version $Revision: 1.1 $ */ public class UTTeX extends UnicodeTranslator implements Unicode { private final static String[] TEX2UNICODE_EXCL = { shy , "-", // soft hyphen ldquo , "``", // LEFT DOUBLE QUOTATION MARK rdquo , "''", // RIGHT DOUBLE QUOTATION MARK rsquor , "`", // LEFT SINGLE QUOTATION MARK rsquo , "'", // RIGHT SINGLE QUOTATION MARK filig , "fi", // LATIN SMALL LIGATURE FI fllig , "fl", // LATIN SMALL LIGATURE FL }; private final static String[] UNICODE2TEX_EXP = { nbsp , "~", // non breaking space iexcl , "!'", // inverted exclamation mark cent , "\\textcent{}", // cent sign pound , "\\pounds{}", // pund sign curren , "\\textcurrency{}", // currency sign yen , "\\textyen{}", // yen sign brvbar , "\\textbrokenbar{}", // broken bar sect , "\\S{}", // section sign Dot , "\\textasciidieresis{}", // diaeresis copy , "\\copyright{}", // copyright sign ordf , "\\textorffeminine{}", // feminine ordinal indicator Lt , "\\guillemotleft{}", // left pointing double angle quotation mark () not , "\\textlnot{}", // not sign reg , "\\textregistered{}", // registered sign macr , "\\textmacron", // macron deg , "$^\\circ$", // degree sign plusmn , "$^\\pm$", // plus minus sign sup2 , "\\textwosuperior", // superscript two sup3 , "\\textthreesuperior", // superscript three acute , "\\textasciiacute", // acute accent mcro , "\\textmu", // micro sign para , "\\P{}", // pilcrow sign middot , "\\textperiodcentered", // middle dot cedil , "\\c{}", // cedilla sup1 , "\\textonesuperior", // superscrip one ordm , "\\textordmasculine", // masculine ordinal indicator Gt , "\\guillemotright", // right pointing double angle quotation mark frac14 , "${}^1\\!/\\!_4$", // vulgar fraction one quarter frac12 , "${}^1\\!/\\!_2$", // vulgar fraction one half frac34 , "${}^3\\!/\\!_4$", // vulagar fraction three quarter iquest , "?'", // inverted question mark Agrave , "\\`A", Aacute , "\\'A", Acirc , "\\^A", Atilde , "\\~A", Auml , "\\\"A", Aring , "\\AA{}", AElig , "\\AE{}", Ccedil , "\\c{C}", Egrave , "\\`E", Eacute , "\\'E", Ecirc , "\\^E", Euml , "\\\"E", Igrave , "\\`I", Iacute , "\\'I", Icirci , "\\^I", Iuml , "\\\"I", ETH , "\\DH{}", // latin capital letter ETH Ntilde , "\\~N", Ograve , "\\`O", Oacute , "\\'O", Ocirc , "\\^O", Otilde , "\\~O", Ouml , "\\\"O", times , "\\texttimes{}", // multiplication sign Ostrok , "\\O{}", Ugrave , "\\`U", Uacute , "\\'U", Ucircr , "\\^U", Uuml , "\\\"U", Yacute , "\\'Y", THORN , "\\TH{}", // latin capital letter thorn szlig , "\\ss", szlig , "\"s", agrave , "\\`a", aacute , "\\'a", acirc , "\\^a", atilde , "\\~a", auml , "\\\"a", aring , "\\aa{}", aelig , "\\ae{}", ccedil , "\\c{c}", egrave , "\\`e", eacute , "\\'e", ecirc , "\\^e", euml , "\\\"e", igrave , "\\`i", iacute , "\\'i", icirc , "\\^i", iuml , "\\\"i", igrave , "\\`{\\i}", iacute , "\\'{\\i}", icirc , "\\^{\\i}", iuml , "\\\"{\\i}", eth , "\\dh{}", // latin small letter eth ntilde , "\\~n", ograve , "\\`o", oacute , "\\'o", ocirc , "\\^o", otilde , "\\~o", ouml , "\\\"o", divide , "\\textdiv", // division sign ostrok , "\\o{}", ugrave , "\\`u", ucute , "\\'u", ucircr , "\\^u", uuml , "\\\"u", yacute , "\\'y", thorn , "\\th{}", // latin small letter thorn yuml , "\\\"y", inodot , "\\i", // LATIN SMALL LETTER DOTLESS I OElig , "\\OE{}", // LATIN CAPITAL LIGATURE OE oelig , "\\oe{}", // LATIN SMALL LIGATURE OE Yuml , "\\\"Y", // LATIN CAPITAL LETTER Y WITH DIAERESIS fnof , "(fnof:not translated)", // LATIN SMALL LETTER F WITH HOOK circ , "\\c{}", // MODIFIER LETTER CIRCUMFLEX ACCENT caron , "\\v{}", // CARON breve , "\\u{}", // BREVE dot , "\\.{}", // DOT ABOVE ring , "(ring:not translated)", // RING ABOVE ogon , "(ogon:not translated)", // OGONEK tilde , "\\~{}", // SMALL TILDE dblac , "\\H{}", // DOUBLE ACUTE ACCENT OHgr , "$\\Omega$", // GREEK CAPITAL LETTER OMEGA b_pi , "$\\pi$", // GREEK SMALL LETTER PI mdash , "---", // EM DASH ndash , "--", // EN DASH lsquor , "\\glq", // SINGLE LOW-9 QUOTATION MARK bdquo , "\\glqq", // DOUBLE LOW-9 QUOTATION MARK dagger , "\\dag", // DAGGER Dagger , "\\ddag", // DOUBLE DAGGER bull , "$\\bullet$", // BULLET hellip , "\\mbox{...}", // HORIZONTAL ELLIPSIS permil , "\\textperthousand", // PER MILLE SIGN lsaquo , "\\flq", // SINGLE LEFT-POINTING ANGLE QUOTATION MARK rsaquo , "\\frq", // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK frasl , "$/$", // FRACTION SLASH euro , "\\texteuro", // EURO SIGN trade , "$^(TM)$", // TRADE MARK SIGN trade , "$^{TM}$", // TRADE MARK SIGN part , "$\\partial$", // PARTIAL DIFFERENTIAL Delta , "$\\Delta$", // capital greek delta prod , "$\\prod$", // N-ARY PRODUCT sum , "$\\sum$", // N-ARY SUMMATION radic , "$\\surd$", // SQUARE ROOT infin , "$\\infty$", // INFINITY int_ , "$\\int$", // INTEGRAL thkap , "$\\approx$", // ALMOST EQUAL TO ne , "$\\not=$", // NOT EQUAL TO le , "$\\leq$", // LESS-THAN OR EQUAL TO ge , "$\\geq$", // GREATER-THAN OR EQUAL TO loz , "$\\diamondsuit$", // LOZENGE _APPLE_LOGO , "(APPLE_LOGO:not translated)", // Apple logo }; private final static String[] UNICODE2TEX_NEXP = { "~" , "$\\sim$", "~" , "{\\verb\"~\"}", "#" , "\\#", "$" , "\\$", "%" , "\\%", "_" , "\\_", "{" , "\\{", "}" , "\\}", "&" , "\\&", }; private final static String[] _UNICODE2TEX = Strings.arraycat(TEX2UNICODE_EXCL, Strings.arraycat(UNICODE2TEX_NEXP, UNICODE2TEX_EXP)); static String[] UNICODE2TEX = Strings.arraycat(TEX2UNICODE_EXCL, Strings.arraycat(UNICODE2TEX_NEXP, Strings.arraycat(mathExpand(UNICODE2TEX_EXP), expand(UNICODE2TEX_EXP)))); private static String[] UNICODE2TEX2 = Strings.arraycat(UNICODE2TEX_NEXP, Strings.arraycat(mathExpand(UNICODE2TEX_EXP), expand(UNICODE2TEX_EXP))); /** The default translator instance. */ public static UnicodeTranslator trans = new UTTeX(); private UTTeX() { super(_UNICODE2TEX); /* for(int i = 0; i < UNICODE2TEX.length; i+= 2) System.out.println(UNICODE2TEX[i] + "->" + UNICODE2TEX[i + 1]); */ } private static String[] mathExpand(String[] source) { Vector resultv = new Vector(); for(int i = 1; i < source.length; i += 2) { if(!source[i].equals("\\$") && source[i].endsWith("$")) { resultv.addElement(source[i - 1]); resultv.addElement("\\ensuremath{" + source[i].substring(1, source[i].length() - 1) + "}"); resultv.addElement(source[i - 1]); resultv.addElement(source[i]); } else { resultv.addElement(source[i - 1]); resultv.addElement(source[i]); } } String[] result = new String[resultv.size()]; for(int i = 0; i < result.length; i++) result[i] = (String)resultv.elementAt(i); return result; } private static String[] expand(String[] source) { String[] result = new String[source.length]; for(int i = 1; i < result.length; i+= 2) { result[i - 1] = source[i - 1]; if(source[i].endsWith("}") || source[i].endsWith("$") || source[i].length() <= 1) result[i] = source[i]; else result[i] = Strings.rightTrunc(source[i], 1) + "{" + source[i].substring(source[i].length() - 1) + "}"; } return result; } public String getUnicode(String nstr) { return replace(nstr, UNICODE2TEX2); } private String replace(String in, String[] table) { String result = in; for(int i = 0; i < table.length; i += 2) { result = replaceOne(result, table[i + 1], table[i], table); if(!result.equals(in)) break; } return result; } private String replaceOne(String in, String that, String by, String[] table) { if(that.equals("")) return in; int index = in.indexOf(that); if(index == -1) return in; String[] prepost = {in.substring(0, index), in.substring(index + that.length())}; for(;;) { String[] tmp = {prepost[0].trim(), prepost[1].trim()}; if(!tmp[0].endsWith("{") || !tmp[1].startsWith("}")) break; prepost[0] = tmp[0].substring(0, tmp[0].length() - 1); prepost[1] = tmp[1].substring(1); } return replace(prepost[0], table) + by + replace(prepost[1], table); } public String getNative(String unicode) { return UnicodeTrans.trans(unicode, UNICODE2TEX); } public static void main(String[] argv) { System.out.println("getNative(" + argv[0] + ") = " + UTTeX.trans.getNative (argv[0])); System.out.println("getUnicode(" + argv[0] + ") = " + UTTeX.trans.getUnicode(argv[0])); } } /* $Log: UTTeX.java,v $ Revision 1.1 2002/07/11 12:00:11 ohitz Initial checkin Revision 1.6 2001/01/04 16:28:42 schubige Header update for 2001 and DIUF Revision 1.5 2000/05/04 09:06:01 schubige *** empty log message *** Revision 1.4 2000/05/02 14:33:33 schubige intermediate checkin for iiuf.util.UTTeX sync. Revision 1.3 2000/05/01 12:41:42 schubige intermediate checkin after UT update Revision 1.2 2000/04/27 09:32:38 schubige intermediate checkin for sybase proxy lower() based where clause Revision 1.1 2000/04/25 12:11:15 schubige pre bibtex restart commit */