package jane.tool; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.nio.charset.Charset; import java.util.Map; import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import jane.core.Util; public final class XlsxExport { private static final Pattern _patToken1 = Pattern.compile("\\(([A-Za-z_]\\w*)\\)"); private static final Pattern _patToken2 = Pattern.compile("^([A-Za-z_]\\w*)$"); private static final Pattern _patXmlStr = Pattern.compile("[<>&\"]"); private static final String[] _strXmlStr = new String[64]; static { _strXmlStr['<'] = "<"; _strXmlStr['>'] = ">"; _strXmlStr['&'] = "&"; _strXmlStr['"'] = """; } private XlsxExport() { } private static String getColumnName(int id) // 只支持A(1)~ZZ(26*26+26) { if(id < 26) return new String(new char[] { (char)(id + 'A') }); return new String(new char[] { (char)(id / 26 + 'A' - 1), (char)(id % 26 + 'A') }); } private static String toXmlStr(String s) { Matcher mat = _patXmlStr.matcher(s); if(!mat.find()) return s; StringBuffer sb = new StringBuffer(s.length() + 16); do mat.appendReplacement(sb, _strXmlStr[mat.group().charAt(0)]); while(mat.find()); return mat.appendTail(sb).toString(); } /** * 把xlsx类型的excel文件中的某一页转换成二维Map保存实际字符串的容器 * <p> * 此调用在发现错误时会抛出异常,注意检查 * @param isXlsx 输入xlsx文件的输入流 * @param sheetId 指定输入xlsx文件中的页ID * @return 一定不会返回null */ public static Map<Integer, Map<Integer, String>> xlsx2Maps(InputStream isXlsx, int sheetId) throws Exception { byte[] xmlStr = null, xmlSheet = null; String fileSheet = "xl/worksheets/sheet" + sheetId + ".xml"; try(ZipInputStream zis = new ZipInputStream(new BufferedInputStream(isXlsx))) { for(ZipEntry ze; (ze = zis.getNextEntry()) != null;) { int len = (int)ze.getSize(); if(len < 0) continue; if(ze.getName().equals("xl/sharedStrings.xml")) Util.readStream(zis, ze.getName(), xmlStr = new byte[len], len); else if(ze.getName().equals(fileSheet)) Util.readStream(zis, ze.getName(), xmlSheet = new byte[len], len); } } if(xmlStr == null) throw new IOException("ERROR: not found xl/sharedStrings.xml"); if(xmlSheet == null) throw new IOException("ERROR: not found " + fileSheet); DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); NodeList nl = db.parse(new ByteArrayInputStream(xmlStr)).getDocumentElement().getElementsByTagName("si"); String[] strTable = new String[nl.getLength()]; // <si><t>string</t></si> for(int i = 0, n = nl.getLength(); i < n; ++i) { NodeList ts = ((Element)nl.item(i)).getElementsByTagName("t"); int tn = ts.getLength(); if(tn == 1) strTable[i] = ts.item(0).getTextContent().trim(); else { StringBuilder sb = new StringBuilder(); for(int j = 0; j < tn; ++j) sb.append(ts.item(j).getTextContent().trim()); strTable[i] = sb.toString(); } } Map<Integer, Map<Integer, String>> res = new TreeMap<>(); nl = db.parse(new ByteArrayInputStream(xmlSheet)).getDocumentElement().getElementsByTagName("c"); for(int i = 0, n = nl.getLength(); i < n; ++i) { Element elem = (Element)nl.item(i); // <c r="A1" s="1" t="s/b"><v>0</v></c> Node node = elem.getElementsByTagName("v").item(0); if(node != null) { String v = node.getTextContent().trim(); String t = elem.getAttribute("t").trim(); if(t.equals("s")) v = strTable[Integer.parseInt(v)]; else if(t.equals("b")) v = (v.equals("1") ? "TRUE" : "FALSE"); if(v != null && !v.isEmpty()) { t = elem.getAttribute("r"); if(!t.isEmpty()) { char a = t.charAt(0); char b = (t.length() > 1 ? t.charAt(1) : '1'); int x = (b - 'A') & 0xffff, y; if(x >= 26) // A(1)~Z(26) { x = a - 'A' + 1; y = Integer.parseInt(t.substring(1)); } else { // AA(27)~ZZ(26*26+27), 暂不支持更多的列 x += (a - 'A') * 26 + 27; y = Integer.parseInt(t.substring(2)); } Map<Integer, String> map = res.get(y); if(map == null) res.put(y, map = new TreeMap<>()); map.put(x, v); } } } } return res; } public static Map<Integer, Map<Integer, String>> xlsx2Maps(String filename, int sheetId) throws Exception { try(InputStream is = new FileInputStream(filename)) { return xlsx2Maps(is, sheetId); } } /** * 把xlsx类型的excel文件中的某一页转换成xml格式 * <p> * 此调用在发现错误时会抛出异常,注意检查 * @param isXlsx 输入xlsx文件的输入流 * @param sheetId 指定输入xlsx文件中的页ID * @param keyCol 指定的key列号. A,B,C...列分别为1,2,3 * @param outXml 输出xml的输出流 * @param tableName 表名. 一般就是输入的文件名,会记录到xml中便于以后查询之用,可以为null */ public static void xlsx2Xml(InputStream isXlsx, int sheetId, int keyCol, OutputStream outXml, String tableName) throws Exception { Map<Integer, Map<Integer, String>> maps = xlsx2Maps(isXlsx, sheetId); Map<Integer, String> map = maps.get(1); if(map == null) throw new IllegalStateException("ERROR: not found table head"); int nColumn = 0; while(map.get(nColumn + 1) != null) ++nColumn; if(nColumn <= 0) throw new IllegalStateException("ERROR: not found any valid table head field"); String[] strColumn = new String[nColumn]; for(int i = 0; i < nColumn; ++i) { String s = map.get(i + 1); Matcher mat = _patToken1.matcher(s); if(!mat.find()) { mat = _patToken2.matcher(s); if(!mat.find()) strColumn[i] = getColumnName(i); else strColumn[i] = mat.group(1); } else strColumn[i] = mat.group(1); } try(PrintWriter pw = new PrintWriter(new OutputStreamWriter(outXml, "utf-8"))) { pw.print("<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>\n<table xlsx=\""); if(tableName != null) pw.print(toXmlStr(tableName)); pw.print("\" sheetid=\""); pw.print(sheetId); pw.print("\" key=\""); if(keyCol > 0 && keyCol <= strColumn.length) pw.print(strColumn[keyCol - 1]); // pw.print("\" time=\""); // pw.print(new SimpleDateFormat("yy-MM-dd HH:mm:ss\">\n").format(new Date())); pw.print("\">\n"); for(Map.Entry<Integer, Map<Integer, String>> e : maps.entrySet()) { if(e.getKey() <= 1) continue; map = e.getValue(); if(map.get(keyCol) == null) continue; int n = 0; for(Map.Entry<Integer, String> e2 : map.entrySet()) { int i = e2.getKey(); if(i <= nColumn) { String val = e2.getValue(); if(n == 0) pw.print("<record"); pw.print(' '); pw.print(strColumn[i - 1]); pw.print('='); pw.print('"'); pw.print(toXmlStr(val)); pw.print('"'); ++n; } } if(n > 0) pw.print("/>\n"); } pw.print("</table>\n"); } } /** * 把xlsx类型的excel文件中的某一页转换成txt格式 * <p> * 此调用在发现错误时会抛出异常,注意检查 * @param isXlsx 输入xlsx文件的输入流 * @param sheetId 指定输入xlsx文件中的页ID * @param outTxt 输出txt的输出流 */ public static void xlsx2Txt(InputStream isXlsx, int sheetId, OutputStream outTxt) throws Exception { Charset cs = Charset.forName("utf-8"); for(Map.Entry<Integer, Map<Integer, String>> e : xlsx2Maps(isXlsx, sheetId).entrySet()) { int y = e.getKey(); for(Map.Entry<Integer, String> e2 : e.getValue().entrySet()) { outTxt.write(String.valueOf(y).getBytes(cs)); outTxt.write(' '); outTxt.write(e2.getKey().toString().getBytes(cs)); outTxt.write(' '); byte[] bytes = e2.getValue().getBytes(cs); outTxt.write(String.valueOf(bytes.length).getBytes(cs)); outTxt.write(' '); outTxt.write(bytes); outTxt.write('\n'); } } } /** * 对xlsx文件的要求:<br> * <li>第一行必须是各字段的名称,且每个名称后面必须有括号包含的程序字段名,以字母数字或下划线(不能以数字开头)构成 * <li>第一列必须是表的主键,全表唯一 * <li>没有字段名的列会被忽略 * <li>第一列为空的整行都会被忽略 * <li>如果非第一列为空,则没有此字段的输出,默认表示数值0或空字符串 */ public static void main(String[] args) throws Exception { if(args.length < 3) { System.err.println("USAGE: java jane.tool.XlsxTool xml <filenameInput.xlsx> <filenameOutput.xml> [sheetId=1] [keyCol=1]"); System.err.println(" java jane.tool.XlsxTool txt <filenameInput.xlsx> <filenameOutput.txt> [sheetId=1]"); return; } int sheetId = (args.length > 3 ? Integer.parseInt(args[3].trim()) : 1); int keyCol = (args.length > 4 ? Integer.parseInt(args[4].trim()) : 1); System.err.print("INFO: convert " + args[1] + " <" + sheetId + "> => " + args[2] + " ... "); @SuppressWarnings("resource") InputStream is = (args[1].equals("-") ? System.in : new FileInputStream(args[1].trim())); try { @SuppressWarnings("resource") OutputStream os = (args[2].equals("-") ? System.out : new FileOutputStream(args[2].trim())); try { if(args[0].equals("xml")) xlsx2Xml(is, sheetId, keyCol, os, args[0]); else xlsx2Txt(is, sheetId, os); } finally { if(os != System.out) os.close(); } } finally { if(is != System.in) is.close(); } System.err.println("OK!"); // usage sample: // Map<Integer, TestType> beanmap = new HashMap<Integer, TestType>(); // Util.xml2BeanMap(args[1], beanmap, Integer.class, TestType.class, null); // for(Entry<Integer, TestType> e : beanmap.entrySet()) // { // System.out.print(e.getKey()); // System.out.print(": "); // System.out.println(e.getValue().toJson()); // } } }