package won.matcher.rescal.actor; import akka.actor.ActorRef; import akka.actor.UntypedActor; import akka.cluster.pubsub.DistributedPubSub; import akka.cluster.pubsub.DistributedPubSubMediator; import akka.event.Logging; import akka.event.LoggingAdapter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import scala.concurrent.duration.FiniteDuration; import won.matcher.rescal.config.RescalMatcherConfig; import won.matcher.rescal.service.HintReader; import won.matcher.utils.tensor.TensorEntryAllGenerator; import won.matcher.utils.tensor.TensorEntryTokenizer; import won.matcher.service.common.event.BulkHintEvent; import won.matcher.service.common.event.HintEvent; import won.matcher.utils.tensor.TensorEntry; import won.matcher.utils.tensor.TensorMatchingData; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.util.Collection; /** * Main actor that controls the rescal matching process. It loads the needs and connection data from the rdf * store, preprocess the data and save it to file system for the actual rescal processing in python. * After the rescal algorithm finished execution the generated hints are loaded and send back for saving and further * processing. * <p> * Created by hfriedrich on 02.07.2015. */ @Component @Scope("prototype") public class RescalMatcherActor extends UntypedActor { private LoggingAdapter log = Logging.getLogger(getContext().system(), this); private long lastQueryDate = Long.MIN_VALUE; private TensorMatchingData rescalInputData = new TensorMatchingData(); private static final String TICK = "tick"; private ActorRef pubSubMediator; @Autowired private HintReader hintReader; @Autowired private RescalMatcherConfig config; @Override public void preStart() throws IOException { // subscribe to need events pubSubMediator = DistributedPubSub.get(getContext().system()).mediator(); // Execute the rescal algorithm regularly getContext().system().scheduler().schedule( FiniteDuration.Zero(), config.getExecutionDuration(), getSelf(), TICK, getContext().dispatcher(), null); } @Override public void onReceive(final Object o) throws Exception { if (o.equals(TICK)) { executeRescalAlgorithm(); } else { unhandled(o); } } /** * Load the need and connection data from the sparql endpoint, preprocess the data and write it to some directory * to be processed by the rescal python algorithm that produces hints. The hints are then loaded and send to * the event bus. * * @throws IOException * @throws InterruptedException */ private void executeRescalAlgorithm() throws IOException, InterruptedException { // load the needs and connections from the rdf store log.info("start processing (every {} minutes) ...", config.getExecutionDuration()); long queryDate = System.currentTimeMillis(); log.info("query needs and connections from rdf store '{}' from date '{}' to date '{}'", config.getSparqlEndpoint(), lastQueryDate, queryDate); // add the attributes of the needs to the rescal tensor TensorEntryAllGenerator tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/attribute", config.getSparqlEndpoint(), lastQueryDate, queryDate); TensorEntryTokenizer tokenizer = new TensorEntryTokenizer(tensorEntryAllGenerator.generateTensorEntries()); Collection<TensorEntry> tensorEntries = tokenizer.generateTensorEntries(); for (TensorEntry entry : tensorEntries) { rescalInputData.addNeedAttribute(entry); } // add the connections between the needs to the rescal tensor tensorEntryAllGenerator = new TensorEntryAllGenerator("queries/connection", config.getSparqlEndpoint(), lastQueryDate, queryDate); tensorEntries = tensorEntryAllGenerator.generateTensorEntries(); for (TensorEntry entry : tensorEntries) { rescalInputData.addNeedConnection(entry.getNeedUri(), entry.getValue(), true); } log.info("number of needs in tensor: {}", rescalInputData.getNeeds().size()); log.info("number of attributes in tensor: {}", rescalInputData.getAttributes().size()); log.info("number of connections in tensor: {}", rescalInputData.getNumberOfConnections()); log.info("number of slices in tensor: {}", rescalInputData.getSlices().size()); if (!rescalInputData.isValidTensor()) { log.info("not enough tensor data available for execution yet, wait for next execution!"); return; } // write the files for rescal algorithm log.info("write rescal input data to folder: {}", config.getExecutionDirectory()); TensorMatchingData cleanedTensorData = rescalInputData.writeCleanedOutputFiles(config.getExecutionDirectory()); int tensorSize = cleanedTensorData.getTensorDimensions()[0]; if (rescalInputData.getNeeds().size() + rescalInputData.getAttributes().size() < config.getRescalRank()) { log.info("Do not start rescal algorithm since tensor size (number of needs + number of attributes) = {} is " + "smaller than rank parameter {}.", tensorSize, config.getRescalRank()); return; } // execute the rescal algorithm in python String pythonCall = "python " + config.getPythonScriptDirectory() + "/rescal-matcher.py -inputfolder " + config.getExecutionDirectory() + " -outputfolder " + config.getExecutionDirectory() + "/output" + " -rank " + config.getRescalRank() + " -threshold " + config.getRescalThreshold(); log.info("execute python script: " + pythonCall); Process pythonProcess = Runtime.getRuntime().exec(pythonCall); BufferedReader in = new BufferedReader(new InputStreamReader(pythonProcess.getInputStream())); String line; while ((line = in.readLine()) != null) { log.info(line); } in.close(); BufferedReader err = new BufferedReader(new InputStreamReader(pythonProcess.getErrorStream())); while ((line = err.readLine()) != null) { log.warning(line); } err.close(); int returnCode = pythonProcess.waitFor(); if (returnCode != 0) { log.error("rescal python call returned error code: " + returnCode); return; } // load the predicted hints and send the to the event bus of the matching service BulkHintEvent hintsEvent = hintReader.readHints(rescalInputData); int numHints = (hintsEvent == null || hintsEvent.getHintEvents() == null) ? 0 : hintsEvent.getHintEvents().size(); log.info("loaded {} hints into bulk hint event and publish", numHints); if (numHints > 0) { StringBuilder builder = new StringBuilder(); for (HintEvent hint : hintsEvent.getHintEvents()) { builder.append("\n- " + hint); } log.info(builder.toString()); pubSubMediator.tell(new DistributedPubSubMediator.Publish(hintsEvent.getClass().getName(), hintsEvent), getSelf()); } lastQueryDate = queryDate; } }