package eu.dnetlib.iis.wf.referenceextraction;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.specific.SpecificRecord;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import eu.dnetlib.iis.common.java.PortBindings;
import eu.dnetlib.iis.common.java.io.CloseableIterator;
import eu.dnetlib.iis.common.java.io.DataStore;
import eu.dnetlib.iis.common.java.io.FileSystemPath;
import eu.dnetlib.iis.common.java.io.JsonStreamWriter;
import eu.dnetlib.iis.common.java.porttype.AnyPortType;
import eu.dnetlib.iis.common.java.porttype.AvroPortType;
import eu.dnetlib.iis.common.java.porttype.PortType;
/**
* Abstract module building database by executing external process.
*
* @author Dominika Tkaczyk
* @author mhorst
*
*/
public abstract class AbstractDBBuilder<T extends SpecificRecord> implements eu.dnetlib.iis.common.java.Process {
/**
* Avro input records schema.
*/
private final Schema inputSchema;
/**
* Input port name.
*/
private final String inputPort;
/**
* Output port name pointing to produced database.
*/
private final String outputPort;
/**
* Underlying file system facade factory.
*/
private final FileSystemFacadeFactory fsFacadeFactory;
/**
* Class encapsulating execution environment.
*
*/
public class ProcessExecutionContext {
private final Process process;
private final File outputFile;
/**
* @param process process to be executed
* @param targetFile output file to be produced
*/
public ProcessExecutionContext(Process process, File outputFile) {
this.process = process;
this.outputFile = outputFile;
}
public Process getProcess() {
return process;
}
public File getOutputFile() {
return outputFile;
}
}
// -------------------------- CONSTRUCTORS ------------------------------
/**
* @param inputSchema avro input records schema
* @param inputPort input port containing avro records
* @param outputPort output port where database should be created
*/
protected AbstractDBBuilder(Schema inputSchema, String inputPort, String outputPort) {
this((conf) -> {
return new HadoopFileSystemFacade(FileSystem.get(conf));
}, inputSchema, inputPort, outputPort);
}
/**
* @param fsFacadeFactory file system facade factory
* @param inputSchema avro input records schema
* @param inputPort input port containing avro records
* @param outputPort output port where database should be created
*/
protected AbstractDBBuilder(FileSystemFacadeFactory fsFacadeFactory,
Schema inputSchema, String inputPort, String outputPort) {
this.fsFacadeFactory = fsFacadeFactory;
this.inputSchema = inputSchema;
this.inputPort = inputPort;
this.outputPort = outputPort;
}
// -------------------------- LOGIC -------------------------------------
/**
* Initializes process generating data on output port based on avro records provided at input.
*
* @param parameters process execution parameters
* @throws IOException
*/
protected abstract ProcessExecutionContext initializeProcess(Map<String, String> parameters) throws IOException;
/**
* Provides input records interator.
* To be reimplemented by subclasses when needed.
*
*/
protected CloseableIterator<T> getInputRecordsIterator(FileSystemPath fileSystemPath) throws IOException {
return DataStore.getReader(fileSystemPath);
}
/**
* Creates output file by reading avro records from input port.
*
*/
@Override
public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters)
throws IOException, InterruptedException {
ProcessExecutionContext executionContext = initializeProcess(parameters);
java.lang.Process process = executionContext.getProcess();
FileSystemFacade fileSystemFacade = fsFacadeFactory.create(conf);
try (CloseableIterator<T> inputRecordsIt = getInputRecordsIterator(
new FileSystemPath(fileSystemFacade.getFileSystem(), portBindings.getInput().get(inputPort)))) {
try (JsonStreamWriter<T> writer = new JsonStreamWriter<T>(inputSchema,
new BufferedOutputStream(process.getOutputStream()))) {
while (inputRecordsIt.hasNext()) {
writer.write(inputRecordsIt.next());
}
}
process.waitFor();
} catch (Exception e) {
throw new IOException("got error while writing to Madis stream: " + getErrorMessage(process.getErrorStream()), e);
}
if (process.exitValue() != 0) {
throw new RuntimeException("MadIS execution failed with error: " + getErrorMessage(process.getErrorStream()));
}
try (InputStream inStream = new FileInputStream(executionContext.getOutputFile());
OutputStream outStream = fileSystemFacade.create(
new FileSystemPath(fileSystemFacade.getFileSystem(), portBindings.getOutput().get(outputPort)).getPath())) {
IOUtils.copy(inStream, outStream);
}
}
@Override
public Map<String, PortType> getInputPorts() {
Map<String, PortType> inputPorts = new HashMap<String, PortType>();
inputPorts.put(inputPort, new AvroPortType(inputSchema));
return inputPorts;
}
@Override
public Map<String, PortType> getOutputPorts() {
Map<String, PortType> outputPorts = new HashMap<String, PortType>();
outputPorts.put(outputPort, new AnyPortType());
return outputPorts;
}
// -------------------------- PRIVATE -------------------------------------
/**
* Provides error message from error stream.
*/
private static String getErrorMessage(InputStream errorStream) throws UnsupportedEncodingException, IOException {
StringBuilder errorBuilder = new StringBuilder();
try (BufferedReader stderr = new BufferedReader(new InputStreamReader(errorStream, "utf8"))) {
String line;
while ((line = stderr.readLine()) != null) {
errorBuilder.append(line);
}
}
return errorBuilder.toString();
}
}