package won.matcher.service.crawler.actor; import akka.actor.OneForOneStrategy; import akka.actor.SupervisorStrategy; import akka.actor.UntypedActor; import akka.event.Logging; import akka.event.LoggingAdapter; import akka.japi.Function; import scala.concurrent.duration.Duration; import won.matcher.service.crawler.config.CrawlConfig; import won.matcher.service.crawler.msg.CrawlUriMessage; import won.matcher.service.crawler.service.CrawlSparqlService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Component; import java.util.Collection; import java.util.LinkedList; /** * Actor that updates the meta data of the crawling of URIs (baseUri, date, status) in a Sparql endpoint. * This is used to know which URIs have already been crawled and for which URIs the * crawling is still running or failed. * Also the actor collects a certain number of messages before it updates the meta data in * a single query bulk update for all of them. * * User: hfriedrich * Date: 17.04.2015 */ @Component @Scope("prototype") public class UpdateMetadataActor extends UntypedActor { private LoggingAdapter log = Logging.getLogger(getContext().system(), this); private Collection<CrawlUriMessage> bulkMessages = new LinkedList<>(); private static final String TICK = "tick"; @Autowired private CrawlConfig config; @Autowired private CrawlSparqlService endpoint; @Override public void preStart() { // Execute the bulk update at least once a while even if not enough messages are there getContext().system().scheduler().schedule( config.getMetaDataUpdateMaxDuration(), config.getMetaDataUpdateMaxDuration(), getSelf(), TICK, getContext().dispatcher(), null); } @Override public void postStop() { // execute update for the remaining messages before stop update(); } /** * Collects messages until the maximum bulk update size is reached or a timer is * elapsed to execute the meta data bulk update. * * @param message */ @Override public void onReceive(final Object message) { if (message instanceof CrawlUriMessage) { CrawlUriMessage uriMsg = (CrawlUriMessage) message; log.debug("Add message to bulk update list: {}", uriMsg); bulkMessages.add(uriMsg); if (bulkMessages.size() >= config.getMetaDataUpdateMaxBulkSize()) { update(); } } else if (message instanceof String) { update(); } else { unhandled(message); } } /** * update meta data for messages available */ private void update() { if (bulkMessages.size() > 0) { log.debug("Update crawling meta data of {} messages", bulkMessages.size()); endpoint.bulkUpdateCrawlingMetadata(bulkMessages); bulkMessages.clear(); } } @Override public SupervisorStrategy supervisorStrategy() { SupervisorStrategy supervisorStrategy = new OneForOneStrategy( 0, Duration.Zero(), new Function<Throwable, SupervisorStrategy.Directive>() { @Override public SupervisorStrategy.Directive apply(Throwable t) throws Exception { log.warning("Actor encountered error: {}", t); // default behaviour return SupervisorStrategy.escalate(); } }); return supervisorStrategy; } }