/**
This file is part of Waarp Project.
Copyright 2009, Frederic Bregier, and individual contributors by the @author
tags. See the COPYRIGHT.txt in the distribution for a full listing of
individual contributors.
All Waarp Project is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Waarp is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Waarp . If not, see <http://www.gnu.org/licenses/>.
*/
package org.waarp.common.transcode;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Set;
import java.util.SortedMap;
import java.util.Map.Entry;
import org.waarp.common.logging.WaarpLogger;
import org.waarp.common.logging.WaarpLoggerFactory;
/**
* Helper to print in output the Charsets available in the JVM.<br>
* <br>
* -html will output HTML format<br>
* -text (default) will output TEXT format<br>
* -csv will output CSV (comma separated) format<br>
* <br>
* Allow also to transcode one file to another: all arguments mandatory<br>
* -from filename charset<br>
* -to filename charset<br>
*
* @author Frederic Bregier
*
*/
public class CharsetsUtil {
/**
* Internal Logger
*/
private static final WaarpLogger logger = WaarpLoggerFactory
.getLogger(CharsetsUtil.class);
/**
* @param args
*/
public static void main(String[] args) {
int format = 1; // TEXT
boolean transcode = false;
String fromFilename = null;
String fromCharset = null;
String toFilename = null;
String toCharset = null;
if (args.length > 0) {
for (int i = 0; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-html")) {
format = 0;
} else if (args[i].equalsIgnoreCase("-text")) {
format = 1;
} else if (args[i].equalsIgnoreCase("-csv")) {
format = 2;
} else if (args[i].equalsIgnoreCase("-to")) {
i++;
toFilename = args[i];
i++;
toCharset = args[i];
} else if (args[i].equalsIgnoreCase("-from")) {
i++;
fromFilename = args[i];
i++;
fromCharset = args[i];
}
}
transcode = (toCharset != null && toFilename != null && fromCharset != null && fromFilename != null);
}
if (transcode) {
boolean status = transcode(fromFilename, fromCharset, toFilename, toCharset, 16384);
System.out.println("Transcode: " + status);
} else {
printOutCharsetsAvailable(format);
}
}
/**
*
* @param format
* 0 = html, 1 = text, 2 = csv
*/
public static void printOutCharsetsAvailable(int format) {
SortedMap<String, Charset> map = Charset.availableCharsets();
Set<Entry<String, Charset>> set = map.entrySet();
switch (format) {
case 0:
System.out
.println("<html><body><table border=1><tr><th>Name</th><th>CanEncode</th><th>IANA Registered</th><th>Aliases</th></tr>");
break;
case 1:
System.out.println("Name\tCanEncode\tIANA Registered\tAliases");
break;
case 2:
System.out.println("Name,CanEncode,IANA Registered,Aliases");
break;
}
for (Entry<String, Charset> entry : set) {
Charset charset = entry.getValue();
String aliases = null;
switch (format) {
case 0:
aliases = "<ul>";
break;
case 1:
aliases = "[ ";
break;
case 2:
aliases = "[ ";
break;
}
Set<String> aliasCharset = charset.aliases();
for (String string : aliasCharset) {
switch (format) {
case 0:
aliases += "<li>" + string + "</li>";
break;
case 1:
aliases += string + " ";
break;
case 2:
aliases += string + " ";
break;
}
}
switch (format) {
case 0:
aliases += "</ul>";
break;
case 1:
aliases += "]";
break;
case 2:
aliases += "]";
break;
}
switch (format) {
case 0:
System.out.println("<tr><td>" + entry.getKey() +
"</td><td>" + charset.canEncode() +
"</td><td>" + charset.isRegistered() +
"</td><td>" + aliases + "</td>");
break;
case 1:
System.out.println(entry.getKey() +
"\t" + charset.canEncode() +
"\t" + charset.isRegistered() +
"\t" + aliases);
break;
case 2:
System.out.println(entry.getKey() +
"," + charset.canEncode() +
"," + charset.isRegistered() +
"," + aliases);
break;
}
}
switch (format) {
case 0:
System.out.println("</table></body></html>");
break;
case 1:
break;
case 2:
break;
}
}
/**
* Method to transcode one file to another using 2 different charsets
*
* @param srcFilename
* @param fromCharset
* @param toFilename
* @param toCharset
* @param bufferSize
* @return True if OK, else False (will log the reason)
*/
public static boolean transcode(String srcFilename, String fromCharset, String toFilename, String toCharset,
int bufferSize) {
boolean success = false;
File from = new File(srcFilename);
File to = new File(toFilename);
FileInputStream fileInputStream = null;
InputStreamReader reader = null;
FileOutputStream fileOutputStream = null;
OutputStreamWriter writer = null;
try {
fileInputStream = new FileInputStream(from);
reader = new InputStreamReader(fileInputStream, fromCharset);
fileOutputStream = new FileOutputStream(to);
writer = new OutputStreamWriter(fileOutputStream, toCharset);
char[] cbuf = new char[bufferSize];
int read = reader.read(cbuf);
while (read > 0) {
writer.write(cbuf, 0, read);
read = reader.read(cbuf);
}
success = true;
} catch (FileNotFoundException e) {
logger.warn("File not found", e);
} catch (UnsupportedEncodingException e) {
logger.warn("Unsupported Encoding", e);
} catch (IOException e) {
logger.warn("File IOException", e);
}
try {
if (reader != null) {
reader.close();
} else if (fileInputStream != null) {
fileInputStream.close();
}
} catch (IOException e) {
}
try {
if (writer != null) {
writer.flush();
writer.close();
} else if (fileOutputStream != null) {
fileOutputStream.close();
}
} catch (IOException e) {
}
return success;
}
}