package org.juxtasoftware; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * Utility to generate a lucene search index for the text content * of the juxta web service * * @author loufoster */ public class Indexer { public static void main(String[] args) { if ( args.length == 0 ) { System.err.println("Uasge: Indexer [db_user] [db_pass] (optional) "); System.exit(0); } String user = args[0];; String pass = null; if ( args.length == 2 ) { pass = args[1]; } Connection conn = null; IndexWriter indexWriter = null; try { // Init DB connection System.out.println("Connecting to JuxtaWS database"); Class.forName("com.mysql.jdbc.Driver").newInstance(); conn = DriverManager.getConnection("jdbc:mysql://127.0.0.1/juxta_ws", user, pass); // Init lucene index System.out.println("Initialize Lucene"); File indexDir = new File("lucene-index/"); if (indexDir.exists() == false ) { indexDir.mkdir(); } Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); Directory directory = FSDirectory.open( indexDir ); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); indexWriter = new IndexWriter(directory, config); indexWriter.deleteAll(); indexWriter.commit(); System.out.println("Indexing SOURCES..."); int srcCnt = indexDocuments("source", indexWriter, conn); System.out.println("\nIndexing WITNESSES..."); int witCnt = indexDocuments("witness", indexWriter, conn); System.out.println("\n============================="); System.out.println(" FINISHED"); System.out.println("============================="); System.out.println(" Sources indexed : "+srcCnt ); System.out.println("Witnesses indexed : "+witCnt ); System.out.println(" TOTAL index size : "+indexWriter.numDocs() ); indexWriter.close(); } catch (Exception e) { e.printStackTrace(); System.exit(0); } finally { // cleanup if ( indexWriter != null ) { try { indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } } } public static int indexDocuments( final String type, IndexWriter indexWriter, Connection conn ) throws SQLException, CorruptIndexException, IOException { final int startDocs = indexWriter.numDocs(); String txtIdCol = "content_id"; String sql = ""; if ( type == "witness" ) { sql = "select juxta_witness.id, juxta_witness.name, text_id, ws.name from juxta_witness " + " inner join juxta_workspace as ws on ws.id = workspace_id"; txtIdCol = "text_id"; } else { sql = "select juxta_source.id, juxta_source.name, content_id, ws.name from juxta_source " + " inner join juxta_workspace as ws on ws.id = workspace_id"; } Statement stmt = conn.createStatement(); Statement stmt2 = conn.createStatement(); ResultSet rs = stmt.executeQuery(sql); while ( rs.next() ) { final Long textId = rs.getLong(txtIdCol); final Long libraryItemId = rs.getLong("id"); final String name = rs.getString("name"); final String ws = rs.getString("ws.name"); System.out.println(" indexing "+ws+":"+name); String sql2 = "select id, content from text_content where id="+textId; ResultSet rs2 = stmt2.executeQuery(sql2); if (rs2.next()) { Document doc = new Document(); doc.add(new Field("id", rs2.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("workspace", ws, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("itemId", libraryItemId.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED)); Field f = new Field("content", rs2.getString("content"), Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add( f ); indexWriter.addDocument(doc); } } stmt2.close(); stmt.close(); indexWriter.commit(); return (indexWriter.numDocs()-startDocs); } }