package com.virjar.dungproxy.server.crawler;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import com.virjar.dungproxy.server.entity.Proxy;
/**
* Created by virjar on 16/11/26.
*/
public abstract class NewCollector {
private static final Logger logger = LoggerFactory.getLogger(NewCollector.class);
/**
* 每次收集的期望数量
*/
protected int batchSize = 100;
/**
* 收集时间间隔,分钟为单位
*/
private int duration = 120;
private int cleanDuration = 24 * 60 * 60 * 1000;
/**
* 当前时间已经收集的资源数目
*/
private int collectedNumber;
private long lastActiveTimeStamp = 0;
private long lastCleanTimeStamp = System.currentTimeMillis();
protected String errorInfo;
public abstract String lasUrl();
public abstract List<Proxy> doCollect();
public List<Proxy> newProxy() {
long timeStamp = System.currentTimeMillis();
if (timeStamp - lastCleanTimeStamp > cleanDuration) {
this.collectedNumber = 0;
lastCleanTimeStamp = timeStamp;
}
if (timeStamp - lastActiveTimeStamp < duration * 60 * 1000) {
return Lists.newArrayList();
}
List<Proxy> ret;
try {
ret = doCollect();
} catch (Exception e) {
ret = Lists.newArrayList();
logger.error("收集器异常:", e);
errorInfo = e.toString();
}
collectedNumber += ret.size();
lastActiveTimeStamp = timeStamp;
return ret;
}
public int getCollectedNumber() {
return collectedNumber;
}
public int getDuration() {
return duration;
}
public void setDuration(int duration) {
this.duration = duration;
}
public int getBatchSize() {
return batchSize;
}
public void setBatchSize(int batchSize) {
this.batchSize = batchSize;
}
public String getErrorInfo() {
return errorInfo;
}
public void setErrorInfo(String errorInfo) {
this.errorInfo = errorInfo;
}
}