package refdata;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
/**
* A small utility class for sorting the reference data by UUID. With this it is
* easier to see updates in the diff-files.
*/
public class RefDataSort {
public static void main(String[] args) {
try {
sortFiles(Paths.get("data", "all"));
sortFiles(Paths.get("data", "units"));
} catch (Exception e) {
e.printStackTrace();
}
}
private static void sortFiles(Path dir) throws Exception {
Files.list(dir).forEach(file -> {
if (!file.toString().endsWith(".csv"))
return;
try {
sortFile(file);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
private static void sortFile(Path file) throws Exception {
System.out.println(" Sort:" + file);
Charset utf8 = Charset.forName("utf-8");
List<String> raws = Files.readAllLines(file, utf8);
List<Line> lines = new ArrayList<>();
for (String raw : raws) {
String s = stripByteOrderMark(raw, utf8);
lines.add(new Line(s));
}
lines.sort((line1, line2) -> line1.uuid.compareTo(line2.uuid));
List<String> sorted = new ArrayList<>();
for (Line line : lines) {
sorted.add(line.rawLine);
}
Files.write(file, sorted, utf8, StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING);
}
/**
* The first line of a file may starts with a byte-order-mark
* (https://en.wikipedia.org/wiki/Byte_order_mark). When we sort the lines
* this line may is not the first line anymore which will lead to errors in
* the CSV import. Thus, we remove a byte-order-mark if the line starts with
* this.
*/
private static String stripByteOrderMark(String raw, Charset utf8) {
byte[] bytes = raw.getBytes(utf8);
if (bytes.length < 3)
return raw;
if ((bytes[0] == (byte) 0xEF) && (bytes[1] == (byte) 0xBB) && (bytes[2] == (byte) 0xBF))
return new String(bytes, 3, bytes.length - 3, utf8);
else
return raw;
}
private static class Line {
String uuid;
String rawLine;
Line(String rawLine) {
this.rawLine = rawLine;
uuid = rawLine.split(";")[0];
if (uuid.startsWith("\"") && uuid.endsWith("\"")) {
uuid = uuid.substring(1, uuid.length() - 1);
}
int length = uuid.length();
if (length != 36) {
System.out.println(" Invalid UUID: " + uuid);
}
}
}
}