package io.github.infolis.infolink.querying;
import io.github.infolis.model.entity.Entity;
import io.github.infolis.model.entity.SearchResult;
import io.github.infolis.util.InformationExtractor;
import io.github.infolis.util.URLParamEncoder;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonObject;
import javax.json.JsonReader;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* @author kata
* @author domi
*
*/
public class DaraSolrQueryService extends QueryService {
public DaraSolrQueryService() {
super("http://www.da-ra.de/solr/dara/", 0.8);
}
private static final Logger log = LoggerFactory.getLogger(DaraSolrQueryService.class);
/**
* Constructs a query url for given title, pubDate and doi.
*
* @param title the query title
* @param pubDate the publication date
* @param doi the doi
* @return a url representing the query
* @throws MalformedURLException
*/
public URL constructQueryURL(String title, String pubDate, String doi, int maxNumber, String resourceType) throws MalformedURLException {
String beginning = "select/";
String remainder = "&start=0&rows=" + maxNumber + "&fl=doi,title&wt=json";
String query = "?q=";
if (!title.isEmpty()) query += "title:" + title;
if (!pubDate.isEmpty()) query += "+publicationDate:" + pubDate;
if (!doi.isEmpty()) query += "+doi:" + doi;
if (!resourceType.isEmpty()) query += "+resourceType:" + resourceType;
query = query.replaceAll("=\\+", "");
return new URL(target + beginning + query + remainder);
}
public URL createQuery(Entity entity) throws MalformedURLException {
String title = "";
String pubDate = "";
String doi = "";
if (this.getQueryStrategy().contains(QueryService.QueryField.title)) {
try {
title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + "\"");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
throw new IllegalArgumentException("Cannot encode \"" + title + "\"");
}
}
if (this.getQueryStrategy().contains(QueryService.QueryField.publicationDate)) {
if(entity.getNumericInfo()!= null && entity.getNumericInfo().size()>0) {
pubDate = ClientUtils.escapeQueryChars(entity.getNumericInfo().get(0));
}
}
if (this.getQueryStrategy().contains(QueryService.QueryField.numericInfoInTitle)) {
if (!title.isEmpty()) log.debug("Warning: both title and numericInfoInTitle strategies set. Using numericInfoInTitle");
if(entity.getNumericInfo()!= null && entity.getNumericInfo().size()>0) {
try {
title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + " " + ClientUtils.escapeQueryChars(entity.getNumericInfo().get(0)) + "\"");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
throw new IllegalArgumentException("Cannot encode \"" + title + "\"");
}
} else
try {
title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + "\"");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
throw new IllegalArgumentException("Cannot encode \"" + title + "\"");
}
/*for (String numInfo : entity.getNumericInfo()) {
title += " " + numInfo;
}*/
}
if (this.getQueryStrategy().contains(QueryService.QueryField.doi)) {
doi = entity.getIdentifiers().get(0);
}
// resourceType field in da|ra: "2" denotes dataset
return constructQueryURL(title, pubDate, doi, this.getMaxNumber(), "2");
}
public List<SearchResult> find(Entity entity) {
//TODO: use solr results and do not parse JSON
List<SearchResult> results = new ArrayList<>();
URL url = null;
JsonArray result = null;
try {
url = new URL(createQuery(entity).toString());
log.debug("Opening stream: " + url);
InputStream is = url.openStream();
InputStreamReader isr = new InputStreamReader(is, "UTF-8");
JsonReader reader = Json.createReader(isr);
JsonObject obj = reader.readObject();
JsonObject response = obj.getJsonObject("response");
result = response.getJsonArray("docs");
reader.close();
is.close();
isr.close();
} catch (MalformedURLException e) {
e.printStackTrace();
throw new RuntimeException("Cannot read response for \"" + url.toString() + "\"");
} catch (IOException e) {
e.printStackTrace();
return results;
}
int listIndex = 0;
for (JsonObject item : result.getValuesAs(JsonObject.class)) {
SearchResult sr = new SearchResult();
JsonArray identifier = null;
sr.setQueryService(this.getUri());
sr.setListIndex(listIndex);
sr.setTags(getTags());
try {
identifier = item.getJsonArray("doi");
sr.setIdentifier(identifier.getString(0));
} catch (NullPointerException npe) {
log.warn("search result does not have a doi. Ignoring");
//sr.setIdentifier("");
break;
}
DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
Date date = new Date();
sr.setDate(dateFormat.format(date));
JsonArray titles = item.getJsonArray("title");
for (int i = 0; i < titles.size(); i++) {
// remove " at beginning and end of title
String title = titles.get(i).toString().substring(1, titles.get(i).toString().length()-1);
List<String> numericInfo = InformationExtractor.getNumericInfo(title);
sr.addTitle(title);
for (String num : numericInfo) sr.addNumericInformation(num);
}
log.debug("Creating search result: titles: " + titles + "; identifier: " + identifier);
results.add(sr);
listIndex++;
}
return results;
}
}