package io.github.infolis.infolink.querying; import io.github.infolis.model.entity.Entity; import io.github.infolis.model.entity.SearchResult; import io.github.infolis.util.InformationExtractor; import io.github.infolis.util.URLParamEncoder; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonReader; import org.apache.solr.client.solrj.util.ClientUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author kata * */ public class DataciteQueryService extends QueryService { public DataciteQueryService() { super("https://api.datacite.org/works/", 0.6); } private static final Logger log = LoggerFactory.getLogger(DataciteQueryService.class); /** * Constructs a query url for given title, pubDate and doi. * * @param title the query title * @param pubDate the publication date * @param doi the doi * @param maxNumber the maximum number of rows to retrieve * @return a url representing the query * @throws MalformedURLException */ public URL constructQueryURL(String title, String pubDate, String doi, int maxNumber) throws MalformedURLException { String beginning = ""; String remainder = "&start=0&rows=" + maxNumber + "&sort=score&order=desc"; String query = "?query="; if (!title.isEmpty()) query += "title:" + title; if (!doi.isEmpty()) query += "%20AND%20doi:\"" + doi + "\""; //query += "+resource\-type\-general") + ":dataset"; //query += "+resource\-type:dataset"; query += "%20AND%20type:\"dataset\""; query = query.replaceAll("=%20AND%20", ""); return new URL(target + beginning + query + remainder); } @Override public URL createQuery(Entity entity) throws MalformedURLException { String title = ""; String pubDate = ""; String doi = ""; if (this.getQueryStrategy().contains(QueryService.QueryField.title)) { try { title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + "\""); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new IllegalArgumentException("Cannot encode \"" + title + "\""); } } if (this.getQueryStrategy().contains(QueryService.QueryField.publicationDate)) { if(entity.getNumericInfo()!= null && entity.getNumericInfo().size()>0) { pubDate = ClientUtils.escapeQueryChars(entity.getNumericInfo().get(0)); } } if (this.getQueryStrategy().contains(QueryService.QueryField.numericInfoInTitle)) { if (!title.isEmpty()) log.debug("Warning: both title and numericInfoInTitle strategies set. Using numericInfoInTitle"); if(entity.getNumericInfo()!= null && entity.getNumericInfo().size()>0) { try { title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + " " + ClientUtils.escapeQueryChars(entity.getNumericInfo().get(0)) + "\""); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new IllegalArgumentException("Cannot encode \"" + title + "\""); } } else try { title = URLParamEncoder.encode("\"" + ClientUtils.escapeQueryChars(entity.getName()) + "\""); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new IllegalArgumentException("Cannot encode \"" + title + "\""); } /*for (String numInfo : entity.getNumericInfo()) { title += " " + numInfo; }*/ } if (this.getQueryStrategy().contains(QueryService.QueryField.doi)) { doi = entity.getIdentifiers().get(0); } return constructQueryURL(title, pubDate, doi, this.getMaxNumber()); } @Override public List<SearchResult> find(Entity entity) { List<SearchResult> results = new ArrayList<>(); URL url = null; JsonArray result = null; try { url = new URL(createQuery(entity).toString()); log.debug("Opening stream: " + url); InputStream is = url.openStream(); InputStreamReader isr = new InputStreamReader(is, "UTF-8"); JsonReader reader = Json.createReader(isr); JsonObject obj = reader.readObject(); result = obj.getJsonArray("data"); reader.close(); is.close(); isr.close(); } catch (MalformedURLException e) { e.printStackTrace(); throw new RuntimeException("Cannot read response for \"" + url.toString() + "\""); } catch (IOException e) { e.printStackTrace(); } int listIndex = 0; for (JsonObject item : result.getValuesAs(JsonObject.class)) { JsonObject attr = item.getJsonObject("attributes"); SearchResult sr = new SearchResult(); sr.setTags(getTags()); sr.setQueryService(this.getUri()); sr.setListIndex(listIndex); try { String identifier = attr.getString("doi"); sr.setIdentifier(identifier); DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date = new Date(); sr.setDate(dateFormat.format(date)); String title = attr.getString("title"); List<String> numericInfo = InformationExtractor.getNumericInfo(title); sr.addTitle(title); for (String num : numericInfo) sr.addNumericInformation(num); log.debug("Creating search result: title: " + title + "; identifier: " + identifier); results.add(sr); listIndex++; } catch (NullPointerException npe) { log.warn("search result does not have doi and title. Ignoring"); log.debug("item: " + item); } } return results; } }