package ru.exorg.processing;
import org.springframework.beans.factory.InitializingBean;
import ru.exorg.core.model.City;
import ru.exorg.core.model.Description;
import ru.exorg.core.model.Location;
import ru.exorg.core.model.POI;
import ru.exorg.core.service.CafeProvider;
import ru.exorg.core.service.DataProvider;
import ru.exorg.core.service.POIProvider;
import java.lang.Exception;
import java.lang.String;
import java.net.SocketTimeoutException;
import java.util.*;
import org.tartarus.snowball.ext.russianStemmer;
// ================================================================================
final public class Main implements InitializingBean {
private DataProvider dataProvider;
private POIProvider poiProvider;
private CafeProvider cafeProvider;
private GeoService geoService;
private Clustering clustering;
private List<POI> pois;
private int clusterLevel = 1;
private double distLim = 10000;
private static Set<String> prohibitedStems
= toSet(new String[]{"дом", "особ", "мост", "здан", "двор", "театр", "особняк", "церков", "ансамбл"});
private static Set<String> toSet(final String[] strings) {
Set<String> res = new HashSet<String>();
for (String s : strings) {
res.add(s);
}
return res;
}
public void setGeoService(GeoService gs) {
this.geoService = gs;
}
public void setDataProvider(DataProvider p) {
this.dataProvider = p;
this.poiProvider = p.getPOIProvider();
this.cafeProvider = p.getCafeProvider();
}
public void setClusteringService(Clustering c) {
this.clustering = c;
}
public void setClusterLevel(int cl) {
this.clusterLevel = cl;
}
private void addGeoInfo(POI poi, int api) throws Exception {
List<Location> locs = this.geoService.lookupLocation(poi.getLocation(), poi.getName(), api);
if (locs != null) {
for (Location loc : locs) {
if (this.dataProvider.isWithinCity(poi.getLocation().getCityId(), loc)) {
double lat = loc.getLat();
double lng = loc.getLng();
poi.getLocation().setAddress(loc.getAddress());
poi.getLocation().setLat(lat);
poi.getLocation().setLng(lng);
City c = this.dataProvider.queryCity(poi.getCityId());
int sqId = (int)(Math.abs(lat - c.getNeLatLng().getLat())/c.getLngSubdivLen()*c.getLatSubdivs()
+
Math.abs(lng - c.getSwLatLng().getLng())/c.getLatSubdivLen() + 1);
poi.setSquareId(sqId);
break;
}
}
}
}
private void addGeoInfo(POI poi) throws Exception {
if (!poi.getLocation().isValid()) {
if (poi.hasAddress()) {
System.out.println("Quering for " + poi.getLocation().getAddress() + " (" + poi.getName() + ")");
} else {
System.out.println("Quering for " + poi.getName());
}
this.addGeoInfo(poi, GeoService.API_YANDEX);
if (!poi.getLocation().isValid()) {
this.addGeoInfo(poi, GeoService.API_GOOGLE);
}
if (!poi.getLocation().isValid()) {
System.out.println("Failed");
}
Thread.sleep(500);
}
}
private void guessType(POI poi) throws Exception {
if (!poi.hasType()) {
dataProvider.guessPOIType(poi);
} else {
System.out.println("Skipping POI " + poi.getName() + " of type " + String.valueOf(poi.getType()));
}
}
private boolean isLike(final String pn1, final String pn2) {
return Util.getLevenshteinDistance(pn1, pn2) <= 6;
}
private int edist(final String s1, final String s2) {
return Util.getLevenshteinDistance(s1, s2);
}
private void clusterize1() {
for (POI poi : this.pois) {
if (poi.hasAddress()) {
List<POI> poiList = this.poiProvider.queryByAddress(poi.getAddress());
if (poiList != null) {
if (!this.clustering.isInCluster(poiList.get(0)) && poiList.size() >= 2) {
long cid = this.clustering.getMaxClusterId() + 1;
for (POI p : poiList) {
this.clustering.setPOICluster(p, cid);
}
}
}
}
}
Clustering.Clusters clusters = this.clustering.getClusters();
for (List<Long> cluster : clusters.values()) {
for (int i = 0; i < cluster.size(); i++) {
boolean remove = true;
POI curPOI = this.poiProvider.queryById(cluster.get(i));
String cur = curPOI.getName();
for (int j = i + 1; j < cluster.size(); j++) {
String other = this.poiProvider.queryById(cluster.get(j)).getName();
if (isLike(cur, other)) {
remove = false;
break;
}
}
if (remove) {
this.clustering.removeFromCluster(curPOI);
}
}
}
}
private void clusterize2() {
russianStemmer stemmer = new russianStemmer();
for (POI poi : this.pois) {
if (!this.clustering.isInCluster(poi) && !poi.getLocation().isValid()) {
String[] ws = poi.getName().split("\\s+");
for (String w : ws) {
stemmer.setCurrent(w);
stemmer.stem();
String stem = stemmer.getCurrent().toLowerCase();
if (prohibitedStems.contains(stem) || stem.length() <= 3) {
continue;
}
List<POI> likes = this.poiProvider.queryLike(stem);
for (POI like : likes) {
if (edist(poi.getName(), like.getName()) < 6 && like.getLocation().isValid()) {
if (this.clustering.isInCluster(like)) {
this.clustering.setPOICluster(poi, this.clustering.getPOICluster(like));
break;
} else {
long cid = this.clustering.getMaxClusterId() + 1;
this.clustering.setPOICluster(poi, cid);
this.clustering.setPOICluster(like, cid);
break;
}
}
}
}
}
}
}
private void processPOI() throws Exception {
this.clustering.clearClusters();
for (POI poi : this.pois) {
try {
List<Description> dl = poi.getDescriptions();
for (Description d : dl) {
d.setText(d.getText().replaceAll("<.*?>", ""));
}
this.addGeoInfo(poi);
this.guessType(poi);
if (poi.getImage().length() == 0) {
poi.addImage("img/default.jpg");
}
this.poiProvider.sync(poi);
} catch (SocketTimeoutException e) {
System.out.println("Failed to retrieve geographic information for " + poi.getName());
}
}
if (this.clusterLevel >= 1) {
this.clusterize1();
this.clusterize2();
this.clustering.commitClusters();
}
}
public void afterPropertiesSet() {
try {
/*
List<POI> l = this.poiProvider.queryLike("марс");
for (POI i : l) {
System.out.println(i.getName());
}
*/
this.pois = this.poiProvider.poiList();
processPOI();
//processCafes();
} catch (Exception e) {
System.out.println(e.toString());
e.printStackTrace();
}
}
}