/**
* SolrConnector
* Copyright 2011 by Michael Peter Christen
* First released 13.09.2011 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
public static class LoadTimeURL {
public long date;
public String url;
public LoadTimeURL(final String url, final long date) {
this.url = url;
this.date = date;
}
}
/**
* clear all caches: inside solr and ouside solr within the implementations of this interface
*/
public void clearCaches();
/**
* get the size of a write buffer (if any) of pending write requests
*/
public int bufferSize();
/**
* get the size of the index
* @return number of results if solr is queries with a catch-all pattern
*/
public long getSize();
/**
* force a commit
*/
public void commit(boolean softCommit);
/**
* force an explicit merge of segments
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
*/
public void optimize(int maxSegments);
/**
* ask the solr subsystem about it's segment number
* @return the segment count, which corresponds to the number of files for an index
*/
public int getSegmentCount();
/**
* test if the connector is already closed
* @return true if the connector is closed
*/
public boolean isClosed();
/**
* close the server connection
*/
public void close();
/**
* delete everything in the solr index
* @throws IOException
*/
public void clear() throws IOException;
/**
* delete an entry from solr using the url hash as document id
* @param id the url hash of the entry
* @throws IOException
*/
public void deleteById(final String id) throws IOException;
/**
* delete a set of entries from solr; entries are identified by their url hash
* @param ids a list of url hashes
* @throws IOException
*/
public void deleteByIds(final Collection<String> ids) throws IOException;
/**
* delete entries from solr according the given solr query string
* @param querystring
* @throws IOException
*/
public void deleteByQuery(final String querystring) throws IOException;
/**
* check if a given document, identified by url hash as document id exists
* @param id the url hash and document id
* @return the load time metadata (url and load data) if any entry in solr exists, null otherwise
* @throws IOException
*/
public LoadTimeURL getLoadTimeURL(final String id) throws IOException;
/**
* add a solr input document
* @param solrdoc
* @throws IOException
* @throws SolrException
*/
public void add(final SolrInputDocument solrdoc) throws IOException, SolrException;
/**
* Update a solr document.
* This will write only a partial update for all fields given in the SolrInputDocument
* and leaves all other fields untouched.
* @param solrdoc
* @throws IOException
* @throws SolrException
*/
public void update(final SolrInputDocument solrdoc) throws IOException, SolrException;
/**
* add a collection of solr input documents
* @param solrdocs
* @throws IOException
* @throws SolrException
*/
public void add(final Collection<SolrInputDocument> solrdoc) throws IOException, SolrException;
/**
* Update a collection of solr input documents.
* This will write only a partial update for all fields given in the SolrInputDocuments
* and leaves all other fields untouched.
* @param solrdocs
* @throws IOException
* @throws SolrException
*/
public void update(final Collection<SolrInputDocument> solrdoc) throws IOException, SolrException;
/**
* get a document from solr by given key for the id-field
* @param key
* @param fields list of fields
* @return one result or null if no result exists
* @throws IOException
*/
public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException;
/**
* get a "full" query response from solr. Please compare to getSolrDocumentListByParams which may be much more efficient
* @param query
* @throws IOException
*/
public QueryResponse getResponseByParams(final ModifiableSolrParams query) throws IOException;
/**
* get the solr document list from a query response
* This differs from getResponseByParams in such a way that it does only create the fields of the response but
* never search snippets and there are also no facets generated.
* @param params
* @return
* @throws IOException
* @throws SolrException
*/
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException;
/**
* get a query result from solr
* to get all results set the query String to "*:*"
* @param querystring the solr query string
* @param sort the solr sort string, may be null to be not used
* @param offset the first result offset
* @param count number of wanted results
* @param fields list of fields
* @throws IOException
*/
public SolrDocumentList getDocumentListByQuery(
final String querystring,
final String sort,
final int offset,
final int count,
final String ... fields) throws IOException;
/**
* get the number of results when this query is done.
* This should only be called if the actual result is never used, and only the count is interesting
* @param querystring
* @return the number of results for this query
*/
public long getCountByQuery(final String querystring) throws IOException;
/**
* get facets of the index: a list of lists with values that are most common in a specific field
* @param query a query which is performed to get the facets
* @param maxresults the maximum size of the resulting maps
* @param fields the field names which are selected as facet
* @return a map with key = facet field name, value = an ordered map of field values for that field
* @throws IOException
*/
public LinkedHashMap<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException;
/**
* Get results from a solr query as a stream of documents.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
* The method returns immediately and feeds the search results into the queue
* @param querystring the solr query string
* @param sort the solr sort string, may be null to be not used
* @param offset first result offset
* @param maxcount the maximum number of results
* @param maxtime the maximum time in milliseconds
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param concurrency is the number of AbstractSolrConnector.POISON_DOCUMENT entries to add at the end of the feed
* @param prefetchIDs if true, then first all IDs are fetched and then all documents are queries by the ID. If false then documents are retrieved directly
* @param fields list of fields
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/
public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(
final String querystring,
final String sort,
final int offset,
final int maxcount,
final long maxtime,
final int buffersize,
final int concurrency,
final boolean prefetchIDs,
final String ... fields);
/**
* Get results from solr queries as a stream of documents.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
* The method returns immediately and feeds the search results into the queue
* @param querystrings the list of solr query strings
* @param sort the solr sort string, may be null to be not used
* @param offset first result offset
* @param maxcount the maximum number of results
* @param maxtime the maximum time in milliseconds
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param concurrency is the number of AbstractSolrConnector.POISON_DOCUMENT entries to add at the end of the feed
* @param prefetchIDs if true, then first all IDs are fetched and then all documents are queries by the ID. If false then documents are retrieved directly
* @param fields list of fields
* @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
*/
public BlockingQueue<SolrDocument> concurrentDocumentsByQueries(
final List<String> querystrings,
final String sort,
final int offset,
final int maxcount,
final long maxtime,
final int buffersize,
final int concurrency,
final boolean prefetchIDs,
final String ... fields);
/**
* get a document id result stream from a solr query.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_ID is returned.
* The method returns immediately and feeds the search results into the queue
* @param querystring
* @param sort the solr sort string, may be null to be not used
* @param offset
* @param maxcount
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param concurrency is the number of AbstractSolrConnector.POISON_ID entries to add at the end of the feed
* @return a list of ids in q blocking queue which is terminated with a number of AbstractSolrConnector.POISON_ID
*/
public BlockingQueue<String> concurrentIDsByQuery(
final String querystring,
final String sort,
final int offset,
final int maxcount,
final long maxtime,
final int buffersize,
final int concurrency);
/**
* get a document id result stream from a set of solr queries.
* The result queue is considered as terminated if AbstractSolrConnector.POISON_ID is returned.
* The method returns immediately and feeds the search results into the queue
* @param querystring a list of query strings
* @param sort the solr sort string, may be null to be not used
* @param offset common offset of all queries
* @param maxcount maximum count for each query
* @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
* @param concurrency is the number of AbstractSolrConnector.POISON_ID entries to add at the end of the feed
* @return a list of ids in q blocking queue which is terminated with a number of AbstractSolrConnector.POISON_ID
*/
public BlockingQueue<String> concurrentIDsByQueries(
final List<String> querystrings,
final String sort,
final int offset,
final int maxcount,
final long maxtime,
final int buffersize,
final int concurrency);
}