SolrConnector.java example

Explorer
yacy_search_server-master
/**
 *  SolrConnector
 *  Copyright 2011 by Michael Peter Christen
 *  First released 13.09.2011 at http://yacy.net
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.cora.federate.solr.connector;

import java.io.IOException;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.BlockingQueue;

import net.yacy.cora.sorting.ReversibleScoreMap;

import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;

public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {

    public static class LoadTimeURL {
        public long date;
        public String url;
        public LoadTimeURL(final String url, final long date) {
            this.url = url;
            this.date = date;
        }
    }
    
    /**
     * clear all caches: inside solr and ouside solr within the implementations of this interface
     */
    public void clearCaches();
    
    /**
     * get the size of a write buffer (if any) of pending write requests
     */
    public int bufferSize();
    
    /**
     * get the size of the index
     * @return number of results if solr is queries with a catch-all pattern
     */
    public long getSize();
    
    /**
     * force a commit
     */
    public void commit(boolean softCommit);

    /**
     * force an explicit merge of segments
     * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
     */
    public void optimize(int maxSegments);
    
    /**
     * ask the solr subsystem about it's segment number
     * @return the segment count, which corresponds to the number of files for an index
     */
    public int getSegmentCount();
    
    /**
     * test if the connector is already closed
     * @return true if the connector is closed
     */
    public boolean isClosed();
    
    /**
     * close the server connection
     */
    public void close();

    /**
     * delete everything in the solr index
     * @throws IOException
     */
    public void clear() throws IOException;

    /**
     * delete an entry from solr using the url hash as document id
     * @param id the url hash of the entry
     * @throws IOException
     */
    public void deleteById(final String id) throws IOException;

    /**
     * delete a set of entries from solr; entries are identified by their url hash
     * @param ids a list of url hashes
     * @throws IOException
     */
    public void deleteByIds(final Collection<String> ids) throws IOException;

    /**
     * delete entries from solr according the given solr query string
     * @param querystring
     * @throws IOException
     */
    public void deleteByQuery(final String querystring) throws IOException;

    /**
     * check if a given document, identified by url hash as document id exists
     * @param id the url hash and document id
     * @return the load time metadata (url and load data) if any entry in solr exists, null otherwise
     * @throws IOException
     */
    public LoadTimeURL getLoadTimeURL(final String id) throws IOException;

    /**
     * add a solr input document
     * @param solrdoc
     * @throws IOException
     * @throws SolrException
     */
    public void add(final SolrInputDocument solrdoc) throws IOException, SolrException;
    
    /**
     * Update a solr document.
     * This will write only a partial update for all fields given in the SolrInputDocument
     * and leaves all other fields untouched.
     * @param solrdoc
     * @throws IOException
     * @throws SolrException
     */
    public void update(final SolrInputDocument solrdoc) throws IOException, SolrException;

    /**
     * add a collection of solr input documents
     * @param solrdocs
     * @throws IOException
     * @throws SolrException
     */
    public void add(final Collection<SolrInputDocument> solrdoc) throws IOException, SolrException;

    /**
     * Update a collection of solr input documents.
     * This will write only a partial update for all fields given in the SolrInputDocuments
     * and leaves all other fields untouched.
     * @param solrdocs
     * @throws IOException
     * @throws SolrException
     */
    public void update(final Collection<SolrInputDocument> solrdoc) throws IOException, SolrException;
    
    /**
     * get a document from solr by given key for the id-field
     * @param key
     * @param fields list of fields
     * @return one result or null if no result exists
     * @throws IOException
     */
    public SolrDocument getDocumentById(final String key, final String ... fields) throws IOException;

    /**
     * get a "full" query response from solr. Please compare to getSolrDocumentListByParams which may be much more efficient
     * @param query
     * @throws IOException
     */
    public QueryResponse getResponseByParams(final ModifiableSolrParams query) throws IOException;

    /**
     * get the solr document list from a query response
     * This differs from getResponseByParams in such a way that it does only create the fields of the response but
     * never search snippets and there are also no facets generated.
     * @param params
     * @return
     * @throws IOException
     * @throws SolrException
     */
    public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException;
   
    /**
     * get a query result from solr
     * to get all results set the query String to "*:*"
     * @param querystring the solr query string
     * @param sort the solr sort string, may be null to be not used
     * @param offset the first result offset
     * @param count number of wanted results
     * @param fields list of fields
     * @throws IOException
     */
    public SolrDocumentList getDocumentListByQuery(
            final String querystring,
            final String sort,
            final int offset,
            final int count,
            final String ... fields) throws IOException;
    
    /**
     * get the number of results when this query is done.
     * This should only be called if the actual result is never used, and only the count is interesting
     * @param querystring
     * @return the number of results for this query
     */
    public long getCountByQuery(final String querystring) throws IOException;

    /**
     * get facets of the index: a list of lists with values that are most common in a specific field
     * @param query a query which is performed to get the facets
     * @param maxresults the maximum size of the resulting maps
     * @param fields the field names which are selected as facet
     * @return a map with key = facet field name, value = an ordered map of field values for that field
     * @throws IOException
     */
    public LinkedHashMap<String, ReversibleScoreMap<String>> getFacets(String query, int maxresults, final String ... fields) throws IOException;
    
    /**
     * Get results from a solr query as a stream of documents.
     * The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
     * The method returns immediately and feeds the search results into the queue
     * @param querystring the solr query string
     * @param sort the solr sort string, may be null to be not used
     * @param offset first result offset
     * @param maxcount the maximum number of results
     * @param maxtime the maximum time in milliseconds
     * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
     * @param concurrency is the number of AbstractSolrConnector.POISON_DOCUMENT entries to add at the end of the feed
     * @param prefetchIDs if true, then first all IDs are fetched and then all documents are queries by the ID. If false then documents are retrieved directly
     * @param fields list of fields
     * @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
     */
    public BlockingQueue<SolrDocument> concurrentDocumentsByQuery(
            final String querystring,
            final String sort,
            final int offset,
            final int maxcount,
            final long maxtime,
            final int buffersize,
            final int concurrency,
            final boolean prefetchIDs,
            final String ... fields);
    
    /**
     * Get results from solr queries as a stream of documents.
     * The result queue is considered as terminated if AbstractSolrConnector.POISON_DOCUMENT is returned.
     * The method returns immediately and feeds the search results into the queue
     * @param querystrings the list of solr query strings
     * @param sort the solr sort string, may be null to be not used
     * @param offset first result offset
     * @param maxcount the maximum number of results
     * @param maxtime the maximum time in milliseconds
     * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
     * @param concurrency is the number of AbstractSolrConnector.POISON_DOCUMENT entries to add at the end of the feed
     * @param prefetchIDs if true, then first all IDs are fetched and then all documents are queries by the ID. If false then documents are retrieved directly
     * @param fields list of fields
     * @return a blocking queue which is terminated with AbstractSolrConnector.POISON_DOCUMENT as last element
     */
    public BlockingQueue<SolrDocument> concurrentDocumentsByQueries(
            final List<String> querystrings,
            final String sort,
            final int offset,
            final int maxcount,
            final long maxtime,
            final int buffersize,
            final int concurrency,
            final boolean prefetchIDs,
            final String ... fields);
            
    /**
     * get a document id result stream from a solr query.
     * The result queue is considered as terminated if AbstractSolrConnector.POISON_ID is returned.
     * The method returns immediately and feeds the search results into the queue
     * @param querystring
     * @param sort the solr sort string, may be null to be not used
     * @param offset
     * @param maxcount
     * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
     * @param concurrency is the number of AbstractSolrConnector.POISON_ID entries to add at the end of the feed
     * @return a list of ids in q blocking queue which is terminated with a number of AbstractSolrConnector.POISON_ID
     */
    public BlockingQueue<String> concurrentIDsByQuery(
            final String querystring,
            final String sort,
            final int offset,
            final int maxcount,
            final long maxtime,
            final int buffersize,
            final int concurrency);

    /**
     * get a document id result stream from a set of solr queries.
     * The result queue is considered as terminated if AbstractSolrConnector.POISON_ID is returned.
     * The method returns immediately and feeds the search results into the queue
     * @param querystring a list of query strings
     * @param sort the solr sort string, may be null to be not used
     * @param offset common offset of all queries
     * @param maxcount maximum count for each query
     * @param buffersize the size of an ArrayBlockingQueue; if <= 0 then a LinkedBlockingQueue is used
     * @param concurrency is the number of AbstractSolrConnector.POISON_ID entries to add at the end of the feed
     * @return a list of ids in q blocking queue which is terminated with a number of AbstractSolrConnector.POISON_ID
     */
    public BlockingQueue<String> concurrentIDsByQueries(
            final List<String> querystrings,
            final String sort,
            final int offset,
            final int maxcount,
            final long maxtime,
            final int buffersize,
            final int concurrency);
}