package io.github.infolis.util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.FileUtils;
/**
* Class containing various utility functions and definitions.
*
* @author katarina.boland@gesis.org
* @version 2014-01-27
*
*/
public class InfolisFileUtils
{
/**
* Writes the given content to file using the given encoding
*
* @param file the output file
* @param encoding encoding to use for writing the file
* @param content content to write to file
* @param append if set, content will be appended to existing file, else existing file will be overwritten
* @throws IOException
*/
public static void writeToFile(File file, String encoding, String content, boolean append) throws IOException
{
FileUtils.write(file, content + System.getProperty("line.separator"), encoding, append);
}
/**
* Reads a file and return its content as a string
*
* @param file the input file
* @param encoding encoding of the input file
* @return a string representing the content of the file
* @throws IOException
*/
public static String readFile(File file, String encoding) throws IOException
{
return FileUtils.readFileToString(file, encoding);
}
/**
* Adds header and start tag to studyRefFinder output XML file.
*
* @throws IOException
*/
public static void prepareOutputFile(String filename) throws IOException
{
FileWriter writer = new FileWriter(filename);
BufferedWriter buf = new BufferedWriter(writer);
buf.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + System.getProperty("line.separator") + "<contexts>" + System.getProperty("line.separator"));
buf.close();
}
/**
* Adds end tag to studyRefFinder output XML file.
*
* @throws IOException
*/
public static void completeOutputFile(String filename) throws IOException
{
FileWriter writer = new FileWriter(filename, true);
BufferedWriter buf = new BufferedWriter(writer);
buf.write(System.getProperty("line.separator") + "</contexts>" + System.getProperty("line.separator"));
buf.close();
}
/**
* ...
*
* @param f ...
* @return ...
* @throws IOException
*/
public static Set<String> getDisctinctPatterns(File f) throws IOException
{
ArrayList<String> patList = (ArrayList<String>)FileUtils.readLines(f, "UTF-8");
Set<String> patternSet = new HashSet<String>();
patternSet.addAll(patList);
return patternSet;
}
/**
* ...
*
* @param f_in ...
* @param f_out ...
* @throws IOException
*/
public static void getDistinct(File f_in, File f_out) throws IOException
{
Set<String> contextSet = getDisctinctPatterns(f_in);
for (String context : contextSet)
{
FileUtils.write(f_out, context + System.getProperty("line.separator"), "UTF-8", true);
}
}
/**
* ...
*
* @param f_in ...
* @param f_out ...
* @throws IOException
*/
public static void getDistinctContexts(File f_in, File f_out) throws IOException
{
Set<String> contextSet = new HashSet<String>();
boolean inContext = false;
String newContext = "";
InputStreamReader isr = new InputStreamReader(new FileInputStream(f_in), "UTF-8");
BufferedReader reader = new BufferedReader(isr);
String line = null;
while ((line = reader.readLine()) != null)
{
if (line.startsWith("\t<context")) { inContext=true; }
else if (line.startsWith("\t</context>"))
{
inContext=false;
newContext += line;
contextSet.add(newContext);
newContext = "";
}
if (inContext == true) { newContext += line + System.getProperty("line.separator"); }
}
reader.close();
OutputStreamWriter fstream = new OutputStreamWriter(new FileOutputStream(f_out), "UTF-8");
BufferedWriter out = new BufferedWriter(fstream);
for (String context : contextSet)
{
out.write(context + System.getProperty("line.separator"));
}
out.close();
}
/**
* ...
*
* @param f_in ...
* @param f_out ...
* @throws IOException
*/
public static void getDistinctFilenames( File f_in, File f_out ) throws IOException
{
Set<String> filenameSet = getDisctinctPatterns(f_in);
Set<String> filenameSetDistinct = new HashSet<String>();
OutputStreamWriter fstream = new OutputStreamWriter(new FileOutputStream(f_out), "UTF-8");
BufferedWriter out = new BufferedWriter(fstream);
for (String filename : filenameSet)
{
filenameSetDistinct.add(new File(filename).getAbsolutePath());
}
for (String distinctFilename: filenameSetDistinct) { out.write(distinctFilename + System.getProperty("line.separator")); }
out.close();
}
}