package er.luceneadaptor; import java.io.IOException; import java.math.BigInteger; import java.text.Format; import java.text.ParseException; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.Version; import com.ibm.icu.math.BigDecimal; import com.webobjects.eoaccess.EOAdaptorChannel; import com.webobjects.eoaccess.EOAttribute; import com.webobjects.eoaccess.EOEntity; import com.webobjects.eoaccess.EOGeneralAdaptorException; import com.webobjects.eoaccess.EOModel; import com.webobjects.eoaccess.EOSQLExpression; import com.webobjects.eoaccess.EOStoredProcedure; import com.webobjects.eocontrol.EOAndQualifier; import com.webobjects.eocontrol.EOClassDescription; import com.webobjects.eocontrol.EOFetchSpecification; import com.webobjects.eocontrol.EOKeyComparisonQualifier; import com.webobjects.eocontrol.EOKeyValueQualifier; import com.webobjects.eocontrol.EONotQualifier; import com.webobjects.eocontrol.EOOrQualifier; import com.webobjects.eocontrol.EOQualifier; import com.webobjects.eocontrol.EOQualifierEvaluation; import com.webobjects.eocontrol.EOSortOrdering; import com.webobjects.foundation.NSArray; import com.webobjects.foundation.NSData; import com.webobjects.foundation.NSDictionary; import com.webobjects.foundation.NSForwardException; import com.webobjects.foundation.NSKeyValueCoding; import com.webobjects.foundation.NSMutableArray; import com.webobjects.foundation.NSMutableDictionary; import com.webobjects.foundation.NSNumberFormatter; import com.webobjects.foundation.NSPropertyListSerialization; import com.webobjects.foundation.NSRange; import com.webobjects.foundation.NSSelector; import com.webobjects.foundation.NSTimestamp; import com.webobjects.foundation.NSTimestampFormatter; import com.webobjects.foundation._NSUtilities; import er.extensions.eof.qualifiers.ERXBetweenQualifier; import er.extensions.foundation.ERXKeyValueCodingUtilities; import er.extensions.foundation.ERXPatcher; import er.extensions.qualifiers.ERXQualifierTraversal; /** * ERLuceneAdaptorChannel takes care of the actual writing and reading. * * @author ak */ public class ERLuceneAdaptorChannel extends EOAdaptorChannel { private static final String EXTERNAL_NAME_KEY = "_e"; private static class LuceneQualifierTraversal extends ERXQualifierTraversal { NSMutableArray _queries; EOEntity _entity; public LuceneQualifierTraversal(EOEntity entity) { _entity = entity; } protected NSArray<Query> queriesForCurrent(int count) { NSRange range = new NSRange(_queries.count() - count, count); NSArray<Query> result = _queries.subarrayWithRange(range); _queries.removeObjectsInRange(range); return result; } @Override protected boolean traverseAndQualifier(EOAndQualifier q) { NSArray<Query> queries = queriesForCurrent(q.qualifiers().count()); BooleanQuery query = new BooleanQuery(); for (Query current : queries) { query.add(current, BooleanClause.Occur.MUST); } _queries.addObject(query); return true; } @Override protected boolean traverseNotQualifier(EONotQualifier q) { NSArray<Query> queries = queriesForCurrent(1); BooleanQuery query = new BooleanQuery(); query.add(queries.lastObject(), BooleanClause.Occur.MUST_NOT); _queries.addObject(query); return true; } @Override protected boolean traverseOrQualifier(EOOrQualifier q) { NSArray<Query> queries = queriesForCurrent(q.qualifiers().count()); DisjunctionMaxQuery query = new DisjunctionMaxQuery(queries, 0); _queries.addObject(query); return true; } @Override protected boolean traverseUnknownQualifier(EOQualifierEvaluation q) { throw new IllegalArgumentException("Unknown qualifier: " + q); } @Override protected boolean traverseKeyValueQualifier(EOKeyValueQualifier q) { Query query = null; String key = _entity.attributeNamed(q.key()).columnName(); IndexAttribute attr = new IndexAttribute(_entity.attributeNamed(key)); if (q instanceof ERXBetweenQualifier) { ERXBetweenQualifier between = (ERXBetweenQualifier) q; Object min = between.minimumValue(); Object max = between.maximumValue(); query = new TermRangeQuery(key, attr.asLuceneValue(min), attr.asLuceneValue(max), false, false); } else if(q.selector().equals(EOQualifier.QualifierOperatorGreaterThan)) { query = new TermRangeQuery(key, attr.asLuceneValue(q.value()), null, false, false); } else if(q.selector().equals(EOQualifier.QualifierOperatorGreaterThanOrEqualTo)) { query = new TermRangeQuery(key, attr.asLuceneValue(q.value()), null, true, false); } else if(q.selector().equals(EOQualifier.QualifierOperatorLessThan)) { query = new TermRangeQuery(key, null, attr.asLuceneValue(q.value()), false, false); } else if(q.selector().equals(EOQualifier.QualifierOperatorLessThanOrEqualTo)) { query = new TermRangeQuery(key, null, attr.asLuceneValue(q.value()), false, true); } else if(q.selector().equals(EOQualifier.QualifierOperatorCaseInsensitiveLike) || q.selector().equals(EOQualifier.QualifierOperatorLike)) { String value = q.value().toString(); if(q.selector().equals(EOQualifier.QualifierOperatorLike)) { value = value.toLowerCase(); } int star = value.indexOf('*'); if(star >= 0) { if(star < value.length() - 1) { query = new WildcardQuery(new Term(key, value)); } else { query = new PrefixQuery(new Term(key, value.substring(0, star))); } } else if(value.contains(" ")) { MultiPhraseQuery multi = new MultiPhraseQuery(); query = multi; String parts[] = value.split(" +"); for (int i = 0; i < parts.length; i++) { String part = parts[i]; multi.add(new Term(key, part)); } } else { query = new TermQuery(new Term(key, value)); } } else { query = new TermQuery(new Term(key, attr.asLuceneValue(q.value()))); } _queries.addObject(query); return true; } @Override protected boolean traverseKeyComparisonQualifier(EOKeyComparisonQualifier q) { throw new IllegalArgumentException("Unknown qualifier: " + q); } @Override public void traverse(EOQualifierEvaluation q, boolean postOrder) { _queries = new NSMutableArray<Query>(); super.traverse(q, true); } public Query query() { BooleanQuery q = new BooleanQuery(); q.add(new TermQuery(new Term(EXTERNAL_NAME_KEY, _entity.externalName())), BooleanClause.Occur.MUST); q.add((Query) _queries.lastObject(), BooleanClause.Occur.MUST); return q; } } /** * Morphs EO values to lucene values. * * @author ak * */ protected static class IndexAttribute { private static String[] NAMES = new String[] { "Arabic", "Brazilian", "CJK", "Chinese", "Czech", "German", "Greek", "Persian", "French", "Dutch", "Russian", "Thai" }; private static String[] CODES = new String[] { "ar", "br", "cjk", "cn", "cz", "de", "el", "fa", "fr", "nl", "ru", "th" }; private static NSDictionary<String, String> LOCALES = new NSDictionary<String, String>(NAMES, CODES); private String _columnName; private TermVector _termVector; private Store _store; private Index _index; private Analyzer _analyzer; private Format _format; private EOAttribute _attribute; @SuppressWarnings("deprecation") public IndexAttribute(EOAttribute attribute) { _attribute = attribute; NSDictionary dict = attribute.userInfo() != null ? attribute.userInfo() : NSDictionary.emptyDictionary(); _columnName = attribute.columnName(); boolean isClassProperty = _attribute.entity().classPropertyNames().contains(_attribute.name()); boolean isDataProperty = _attribute.className().endsWith("NSData"); boolean isStringProperty = _attribute.className().endsWith("String"); _termVector = (TermVector) classValue(dict, "termVector", TermVector.class, isClassProperty && !isDataProperty && isStringProperty ? "YES" : "NO"); _store = (Store) classValue(dict, "store", Store.class, "YES"); _index = (Index) classValue(dict, "index", Index.class, isClassProperty && !isDataProperty && isStringProperty ? "ANALYZED" : "NOT_ANALYZED"); String analyzerClass = (String) dict.objectForKey("analyzer"); if (analyzerClass == null && _columnName.matches("\\w+_(\\w+)")) { String locale = _columnName.substring(_columnName.lastIndexOf('_') + 1).toLowerCase(); analyzerClass = LOCALES.objectForKey(locale); if (analyzerClass != null) { analyzerClass = ERXPatcher.classForName("org.apache.lucene.analysis." + locale + "." + analyzerClass).getName(); } } if (analyzerClass == null) { analyzerClass = StandardAnalyzer.class.getName(); } Class c = ERXPatcher.classForName(analyzerClass); _analyzer = (Analyzer) _NSUtilities.instantiateObject(c, new Class[] { Version.class }, new Object[] { Version.LUCENE_20 }, true, false); _format = (Format) create((String) dict.objectForKey("format")); String numberFormat = (String) dict.objectForKey("numberformat"); if (numberFormat != null) { _format = new NSNumberFormatter(numberFormat); } String dateformat = (String) dict.objectForKey("dateformat"); if (dateformat != null) { _format = new NSTimestampFormatter(dateformat); } } private Object create(String className) { if (className != null) { try { Class c = ERXPatcher.classForName(className); return c.newInstance(); } catch (InstantiationException e) { throw NSForwardException._runtimeExceptionForThrowable(e); } catch (IllegalAccessException e) { throw NSForwardException._runtimeExceptionForThrowable(e); } } return null; } private Object classValue(NSDictionary dict, String key, Class c, String defaultValue) { Object result; String code = (String) dict.objectForKey(key); if (code == null) { code = defaultValue; } result = ERXKeyValueCodingUtilities.classValueForKey(c, code); return result; } public TermVector termVector() { return _termVector; } public Index index() { return _index; } public Store store() { return _store; } public String columnName() { return _columnName; } public Analyzer analyzer() { return _analyzer; } public String asLuceneValue(Object value) { if (_format != null) { return _format.format(value); } if(value == null) { return null; } if (attribute().valueType() != null) { char valueType = attribute().valueType().charAt(0); switch (valueType) { case 'i': return NumericUtils.intToPrefixCoded(((Number) value).intValue()); case 'b': return NumericUtils.longToPrefixCoded(((Number) value).longValue()); case 'l': return NumericUtils.longToPrefixCoded(((Number) value).longValue()); case 'd': return NumericUtils.doubleToPrefixCoded(((Number) value).doubleValue()); case 'B': return NumericUtils.doubleToPrefixCoded(((Number) value).doubleValue()); } } if (value instanceof Date) { return DateTools.dateToString((Date) value, Resolution.MILLISECOND); } else if (value instanceof NSData) { return NSPropertyListSerialization.stringFromPropertyList(value); } else if (value instanceof NSArray) { return ((NSArray) value).componentsJoinedByString(" "); } return value.toString(); } public Object asEOFValue(String value) { try { if (_format != null) { return _format.parseObject(value); } if(value == null) { return null; } if (attribute().valueType() != null) { char valueType = attribute().valueType().charAt(0); switch (valueType) { case 'i': return Integer.valueOf(NumericUtils.prefixCodedToInt(value)); case 'b': return BigInteger.valueOf(NumericUtils.prefixCodedToLong(value)); case 'l': return Long.valueOf(NumericUtils.prefixCodedToLong(value)); case 'd': return Double.valueOf(NumericUtils.prefixCodedToDouble(value)); case 'B': return BigDecimal.valueOf(NumericUtils.prefixCodedToDouble(value)); } } if (attribute().className().contains("NSTimestamp")) { return new NSTimestamp(DateTools.stringToDate(value)); } else if (attribute().className().contains("NSData")) { return new NSData((NSData) NSPropertyListSerialization.propertyListFromString(value)); } else if (attribute().className().contains("NSArray")) { return NSArray.componentsSeparatedByString(value, " "); } return value.toString(); } catch (ParseException ex) { throw NSForwardException._runtimeExceptionForThrowable(ex); } } public Field valueToField(Document doc, Object value) { String stringValue = asLuceneValue(value); Field field = doc.getField(columnName()); if (value != null) { if (field == null) { field = new Field(columnName(), stringValue, store(), index(), termVector()); } field.setValue(stringValue); } else { field = null; } if (field != null) { field.setValue(stringValue); } return field; } public EOAttribute attribute() { return _attribute; } } private NSArray<EOAttribute> _attributes; private NSArray<IndexAttribute> _indexAttributes; private EOEntity _entity; private int _fetchIndex; private boolean _open; private IndexSearcher _searcher; private boolean _fetchInProgress = false; private TopDocs _fetchedDocs; public ERLuceneAdaptorChannel(ERLuceneAdaptorContext context) { super(context); _fetchIndex = -1; } private NSArray<IndexAttribute> attributesForEntity(EOEntity entity) { return attributesForAttributes(entity.attributesToFetch()); } private NSArray<IndexAttribute> attributesForAttributes(NSArray<EOAttribute> attributes) { NSMutableArray result = new NSMutableArray<IndexAttribute>(attributes.count()); for (EOAttribute attribute : attributes) { result.addObject(new IndexAttribute(attribute)); } return result; } public IndexWriter writer() { return adaptorContext().writer(); } public IndexSearcher searcher() throws CorruptIndexException, IOException { if (_searcher == null/* * || !adaptorContext().adaptor().indexReader().isCurrent () */) { _searcher = new IndexSearcher(adaptorContext().adaptor().indexReader()); } return _searcher; } @Override public ERLuceneAdaptorContext adaptorContext() { return (ERLuceneAdaptorContext) super.adaptorContext(); } @Override public NSArray<EOAttribute> attributesToFetch() { return _attributes; } @Override public void cancelFetch() { reset(); } private void reset() { _fetchInProgress = false; _fetchedDocs = null; _fetchIndex = -1; _entity = null; _searcher = null; _attributes = null; _indexAttributes = null; } @Override public void closeChannel() { _open = false; } @Override public NSArray describeResults() { return _attributes; } @Override public NSArray describeTableNames() { return NSArray.EmptyArray; } @Override public EOModel describeModelWithTableNames(NSArray anArray) { return null; } @Override public void evaluateExpression(EOSQLExpression anExpression) { throw new UnsupportedOperationException("ERLuceneAdaptorChannel.evaluateExpression"); } @Override public void executeStoredProcedure(EOStoredProcedure aStoredProcedure, NSDictionary someValues) { throw new UnsupportedOperationException("ERLuceneAdaptorChannel.executeStoredProcedure"); } @Override public NSMutableDictionary fetchRow() { if (!_fetchInProgress) { return null; } NSMutableDictionary row = null; if (hasMoreRowsToReturn()) { try { int docId = _fetchedDocs.scoreDocs[_fetchIndex++].doc; Document doc = searcher().doc(docId); EOClassDescription cd = EOClassDescription.classDescriptionForEntityName(_entity.name()); NSMutableDictionary dict = cd._newDictionaryForProperties(); for (IndexAttribute attr : _indexAttributes) { String name = attr.attribute().name(); String columnName = attr.attribute().columnName(); Field field = doc.getField(columnName); Object value = null; if (field != null) { if (field.isBinary()) { value = new NSData(field.getBinaryValue()); } else { String stringValue = field.stringValue(); value = attr.asEOFValue(stringValue); } dict.setObjectForKey(value, name); } else { dict.setObjectForKey(NSKeyValueCoding.NullValue, name); } } row = dict; } catch (CorruptIndexException e) { throw new ERLuceneAdaptorException("Failed to fetch row: " + e.getMessage(), e); } catch (IOException e) { throw new ERLuceneAdaptorException("Failed to fetch row: " + e.getMessage(), e); } } _fetchInProgress = hasMoreRowsToReturn(); return row; } private boolean hasMoreRowsToReturn() { return _fetchIndex < _fetchedDocs.totalHits; } @Override public boolean isFetchInProgress() { return _fetchInProgress; } @Override public boolean isOpen() { return _open; } @Override public void openChannel() { if (!_open) { _open = true; } } @Override public NSDictionary returnValuesForLastStoredProcedureInvocation() { throw new UnsupportedOperationException("ERLuceneAdaptorChannel.returnValuesForLastStoredProcedureInvocation"); } @Override public void selectAttributes(NSArray attributesToFetch, EOFetchSpecification fs, boolean shouldLock, EOEntity entity) { if (entity == null) { throw new IllegalArgumentException("null entity."); } if (attributesToFetch == null) { throw new IllegalArgumentException("null attributes."); } _fetchInProgress = true; _entity = entity; _searcher = null; setAttributesToFetch(attributesToFetch); try { _fetchIndex = 0; IndexSearcher searcher = searcher(); Query query = null; Sort sort = null; if(fs.hints() != null) { query = (Query) fs.hints().objectForKey(ERLuceneAdaptor.QUERY_HINTS); sort = (Sort) fs.hints().objectForKey(ERLuceneAdaptor.SORT_HINTS); } if(query == null) { query = queryForQualifier(fs.qualifier(), entity); } if(sort == null) { sort = sortForSortOrderings(fs.sortOrderings()); } int fetchLimit = fs.fetchLimit() > 0 ? fs.fetchLimit() : Integer.MAX_VALUE; if (sort != null) { _fetchedDocs = searcher.search(query, null, fetchLimit, sort); } else { _fetchedDocs = searcher.search(query, fetchLimit); } } catch (EOGeneralAdaptorException e) { cancelFetch(); throw e; } catch (Throwable e) { cancelFetch(); throw new ERLuceneAdaptorException("Failed to fetch '" + entity.name() + "' with fetch specification '" + fs + "': " + e.getMessage(), e); } } @SuppressWarnings("unchecked") @Override public void setAttributesToFetch(NSArray attributesToFetch) { if (attributesToFetch == null) { throw new IllegalArgumentException("ERLuceneAdaptorChannel.setAttributesToFetch: null attributes."); } _attributes = attributesToFetch; _indexAttributes = attributesForAttributes(attributesToFetch); } private Term termForDocument(Document doc, EOEntity entity) { for (IndexAttribute info : attributesForAttributes(entity.primaryKeyAttributes())) { String name = info.columnName(); String value = doc.get(name); Term term = new Term(name); term = term.createTerm(value); return term; } return null; } private void fillWithDictionary(Document doc, NSDictionary row, EOEntity entity) { for (IndexAttribute info : attributesForEntity(entity)) { Object value = row.objectForKey(info.attribute().columnName()); if (value != null) { if(value == NSKeyValueCoding.NullValue) { value = null; } Field field = info.valueToField(doc, value); if (field != null) { doc.add(field); } } } } @Override public int updateValuesInRowsDescribedByQualifier(NSDictionary updatedRow, EOQualifier qualifier, EOEntity entity) { try { IndexSearcher searcher = searcher(); Query query = queryForQualifier(qualifier, entity); TopDocs fetchedDocs = searcher.search(query, Integer.MAX_VALUE); int count = fetchedDocs.totalHits; for (int i = 0; i < count; i++) { int docId = fetchedDocs.scoreDocs[i].doc; Document doc = searcher.doc(docId); fillWithDictionary(doc, updatedRow, entity); Term term = termForDocument(doc, entity); writer().updateDocument(term, doc); } return count; } catch (EOGeneralAdaptorException e) { throw e; } catch (Throwable e) { throw new ERLuceneAdaptorException("Failed to update '" + entity.name() + "' row " + updatedRow + " with qualifier " + qualifier + ": " + e.getMessage(), e); } } @Override public void insertRow(NSDictionary row, EOEntity entity) { try { Document doc = new Document(); fillWithDictionary(doc, row, entity); doc.add(new Field(EXTERNAL_NAME_KEY, entity.externalName(), Store.NO, Index.NOT_ANALYZED)); writer().addDocument(doc); } catch (EOGeneralAdaptorException e) { throw e; } catch (Throwable e) { throw new ERLuceneAdaptorException("Failed to insert '" + entity.name() + "' with row " + row + ": " + e.getMessage(), e); } } @Override public int deleteRowsDescribedByQualifier(EOQualifier qualifier, EOEntity entity) { try { IndexSearcher searcher = searcher(); Query query = queryForQualifier(qualifier, entity); TopDocs fetchedDocs = searcher.search(query, Integer.MAX_VALUE); int count = fetchedDocs.totalHits; for (int i = 0; i < count; i++) { int docId = fetchedDocs.scoreDocs[i].doc; Document doc = searcher.doc(docId); Term term = termForDocument(doc, entity); writer().deleteDocuments(term); } return count; } catch (EOGeneralAdaptorException e) { throw e; } catch (Throwable e) { throw new ERLuceneAdaptorException("Failed to delete '" + entity.name() + "' with qualifier " + qualifier + ": " + e.getMessage(), e); } } /** * Convenience method to create a Lucene query from an EOF qualifier. * @param qualifier * @param entity */ public static Query queryForQualifier(EOQualifier qualifier, EOEntity entity) { if(qualifier == null) { return new TermQuery(new Term(EXTERNAL_NAME_KEY, entity.externalName())); } LuceneQualifierTraversal traverser = new LuceneQualifierTraversal(entity); traverser.traverse(qualifier, true); Query query = traverser.query(); return query; } /** * Convenience method to create a Lucene sort from an EOF sort ordering array. * @param sortOrderings */ public static Sort sortForSortOrderings(NSArray<EOSortOrdering> sortOrderings) { Sort sort = null; if (sortOrderings != null && sortOrderings.count() > 0) { NSMutableArray<SortField> fields = new NSMutableArray<SortField>(sortOrderings.count()); for (EOSortOrdering s : sortOrderings) { String name = s.key(); NSSelector sel = s.selector(); boolean reverse = sel.equals(EOSortOrdering.CompareDescending) || sel.equals(EOSortOrdering.CompareCaseInsensitiveDescending); SortField sf = new SortField(name, SortField.DOC, reverse); fields.addObject(sf); } if (fields.count() > 0) { sort = new Sort(); sort.setSort(fields.toArray(new SortField[] {})); } } return sort; } }