package won.matcher.service.crawler.msg;
import java.io.Serializable;
/**
* Message class used to crawl URIs relative to base URIs from certain won nodes
*
* User: hfriedrich
* Date: 30.03.2015
*/
public class CrawlUriMessage implements Serializable
{
public static enum STATUS
{
PROCESS, // resource is currently in the crawling process (e.g. downloading, link extraction, saving to rdf store)
FAILED, // crawler failed to process the resource correctly in the last execution
DONE, // resource was successfully crawled and saved and extracted links from that resource are processed too
SAVE, // resource has been saved to the rdf store but links are not extracted. That means crawling of linked
// resources is not happening right now. In contrast to "DONE" the "SAVE" status is set if the resource
// was received by event subscription from the wonnode and not via the crawling process.
}
private String uri;
private String baseUri;
private String wonNodeUri;
private STATUS status;
private long crawlDate;
/**
* Constructor
*
* @param uri URI that should be or is already crawled
* @param baseUri base URI that is used with property paths to extract further URIs
* @param wonNodeUri URI of the corresponding won node
* @param status describes what to with the URI
*/
/**
*
* @param uri URI that should be or is already crawled
* @param baseUri base URI that is used with property paths to extract further URIs
* @param wonNodeUri URI of the corresponding won node
* @param status describes what to with the URI
* @param crawlDate timestamp in milli seconds when crawling message was generated
*/
public CrawlUriMessage(final String uri, final String baseUri, String wonNodeUri, final STATUS status, long crawlDate) {
this.uri = uri;
this.baseUri = baseUri;
this.status = status;
this.wonNodeUri = wonNodeUri;
this.crawlDate = crawlDate;
}
/**
*
* @param uri URI that should be or is already crawled
* @param baseUri base URI that is used with property paths to extract further URIs
* @param status describes what to with the URI
* @param crawlDate timestamp in milli seconds when crawling message was generated
*/
public CrawlUriMessage(final String uri, final String baseUri, final STATUS status, long crawlDate) {
this.uri = uri;
this.baseUri = baseUri;
this.status = status;
this.wonNodeUri = null;
this.crawlDate = crawlDate;
}
public String getUri() {
return uri;
}
public STATUS getStatus() {
return status;
}
public String getBaseUri() {
return baseUri;
}
public String getWonNodeUri() {
return wonNodeUri;
}
public long getCrawlDate() {
return crawlDate;
}
@Override
public String toString() {
return "[" + uri + "," + baseUri + "," + wonNodeUri + "," + status + "," + crawlDate + "]";
}
@Override
public CrawlUriMessage clone() {
return new CrawlUriMessage(uri, baseUri, wonNodeUri, status, crawlDate);
}
@Override
public boolean equals(Object obj) {
if (obj instanceof CrawlUriMessage) {
CrawlUriMessage msg = (CrawlUriMessage) obj;
if (uri.equals(msg.getUri()) && baseUri.equals(msg.getBaseUri()) &&
status.equals(msg.getStatus()) && crawlDate == msg.getCrawlDate()) {
return (wonNodeUri == null) ? msg.getWonNodeUri() == null : wonNodeUri.equals(msg.getWonNodeUri());
}
}
return false;
}
@Override
public int hashCode() {
return (uri + baseUri + wonNodeUri + status.toString() + crawlDate).hashCode();
}
}