/******************************************************************************* * Gisgraphy Project * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA * * Copyright 2008 Gisgraphy project * David Masclet <davidmasclet@gisgraphy.com> * * *******************************************************************************/ package com.gisgraphy.helper; import java.io.UnsupportedEncodingException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.gisgraphy.domain.Constants; /** * Encoding helper * * @author <a href="mailto:david.masclet@gisgraphy.com">David Masclet</a> */ public class EncodingHelper { protected static final Logger logger = LoggerFactory.getLogger(EncodingHelper.class); /** * useful for windows only, this method is a workaround for encoding * problems on Windows.<br> * any suggestion are welcomed * * @return the string in utf-8 */ public static String toUTF8(String string) { String utf8 = ""; try { utf8 = new String(string.getBytes(Constants.CHARSET)); } catch (UnsupportedEncodingException e1) { throw new RuntimeException("can not change String Encoding"); } return utf8; } /** * Set the file.encoding and sun.jnu.encoding to UTF-8 */ public static void setJVMEncodingToUTF8() { setSystemProperty("file.encoding", Constants.CHARSET); setSystemProperty("sun.jnu.encoding", Constants.CHARSET); } private static void setSystemProperty(String name, String value) { if (System.getProperty(name) == null || !System.getProperty(name).equals(value)) { logger.info("change system property from " + System.getProperty(name) + " to " + value); System.setProperty(name, value); logger.info("System property" + name + " is now : " + System.getProperty(name)); } else { logger.info(name + "=" + System.getProperty("file.encoding")); } } /** * To replace accented characters in a String by unaccented equivalents. * source code from lucene ISOLatin1AccentFilter */ public final static String removeAccents(String input) { final StringBuffer output = new StringBuffer(); for (int i = 0; i < input.length(); i++) { switch (input.charAt(i)) { case '\u00C0': // À case '\u00C1': // �? case '\u00C2': //  case '\u00C3': // à case '\u00C4': // Ä case '\u00C5': // Å output.append("A"); break; case '\u00C6': // Æ output.append("AE"); break; case '\u00C7': // Ç output.append("C"); break; case '\u00C8': // È case '\u00C9': // É case '\u00CA': // Ê case '\u00CB': // Ë output.append("E"); break; case '\u00CC': // Ì case '\u00CD': // �? case '\u00CE': // Î case '\u00CF': // �? output.append("I"); break; case '\u00D0': // �? output.append("D"); break; case '\u00D1': // Ñ output.append("N"); break; case '\u00D2': // Ò case '\u00D3': // Ó case '\u00D4': // Ô case '\u00D5': // Õ case '\u00D6': // Ö case '\u00D8': // Ø output.append("O"); break; case '\u0152': // Œ output.append("OE"); break; case '\u00DE': // Þ output.append("TH"); break; case '\u00D9': // Ù case '\u00DA': // Ú case '\u00DB': // Û case '\u00DC': // Ü output.append("U"); break; case '\u00DD': // �? case '\u0178': // Ÿ output.append("Y"); break; case '\u00E0': // à case '\u00E1': // á case '\u00E2': // â case '\u00E3': // ã case '\u00E4': // ä case '\u00E5': // å output.append("a"); break; case '\u00E6': // æ output.append("ae"); break; case '\u00E7': // ç output.append("c"); break; case '\u00E8': // è case '\u00E9': // é case '\u00EA': // ê case '\u00EB': // ë output.append("e"); break; case '\u00EC': // ì case '\u00ED': // í case '\u00EE': // î case '\u00EF': // ï output.append("i"); break; case '\u00F0': // ð output.append("d"); break; case '\u00F1': // ñ output.append("n"); break; case '\u00F2': // ò case '\u00F3': // ó case '\u00F4': // ô case '\u00F5': // õ case '\u00F6': // ö case '\u00F8': // ø output.append("o"); break; case '\u0153': // œ output.append("oe"); break; case '\u00DF': // ß output.append("ss"); break; case '\u00FE': // þ output.append("th"); break; case '\u00F9': // ù case '\u00FA': // ú case '\u00FB': // û case '\u00FC': // ü output.append("u"); break; case '\u00FD': // ý case '\u00FF': // ÿ output.append("y"); break; default: output.append(input.charAt(i)); break; } } return output.toString(); } }