From cad57c2c88aa45012076f96949948fbe2df96fab Mon Sep 17 00:00:00 2001 From: bdc34 Date: Fri, 25 Jun 2010 18:20:53 +0000 Subject: [PATCH] Filtering out properties and classes from full text search NIHVIVO-643 --- .../webapp/dao/jena/IndividualDaoJena.java | 26 +++++++++++------- .../search/beans/ObjectSourceIface.java | 9 ++++--- .../webapp/search/indexing/IndexBuilder.java | 25 ++++++++++------- .../webapp/search/indexing/IndexerIface.java | 15 ++++++----- .../search/lucene/Entity2LuceneDoc.java | 22 +-------------- .../webapp/search/lucene/LuceneIndexer.java | 27 ++++++++++--------- 6 files changed, 61 insertions(+), 63 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/IndividualDaoJena.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/IndividualDaoJena.java index 142549d65..b2d711ade 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/IndividualDaoJena.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/IndividualDaoJena.java @@ -986,19 +986,27 @@ public class IndividualDaoJena extends JenaBaseDao implements IndividualDao { try { while (typeIt.hasNext()) { Resource typeRes = (Resource) typeIt.next(); + String type = typeRes.getURI(); // brute forcing this until we implement a better strategy - if (VitroVocabulary.PORTAL.equals(typeRes.getURI()) || - VitroVocabulary.TAB.equals(typeRes.getURI()) || - VitroVocabulary.TAB_INDIVIDUALRELATION.equals(typeRes.getURI()) || - VitroVocabulary.LINK.equals(typeRes.getURI()) || - VitroVocabulary.KEYWORD.equals(typeRes.getURI()) || - VitroVocabulary.KEYWORD_INDIVIDUALRELATION.equals(typeRes.getURI()) || - VitroVocabulary.CLASSGROUP.equals(typeRes.getURI()) || - VitroVocabulary.PROPERTYGROUP.equals(typeRes.getURI()) || - VitroVocabulary.APPLICATION.equals(typeRes.getURI())) { + if (VitroVocabulary.PORTAL.equals(type) || + VitroVocabulary.TAB.equals(type) || + VitroVocabulary.TAB_INDIVIDUALRELATION.equals(type) || + VitroVocabulary.LINK.equals(type) || + VitroVocabulary.KEYWORD.equals(type) || + VitroVocabulary.KEYWORD_INDIVIDUALRELATION.equals(type) || + VitroVocabulary.CLASSGROUP.equals(type) || + VitroVocabulary.PROPERTYGROUP.equals(type) || + VitroVocabulary.APPLICATION.equals(type)) { userVisible = false; break; } + if( OWL.ObjectProperty.getURI().equals(type) || + OWL.DatatypeProperty.getURI().equals(type) || + OWL.AnnotationProperty.getURI().equals(type) || + RDF.type.getURI().equals(type) ){ + userVisible = false; + break; + } } } finally { typeIt.close(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ObjectSourceIface.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ObjectSourceIface.java index 7dc3650bb..41ddcf605 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ObjectSourceIface.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ObjectSourceIface.java @@ -2,13 +2,14 @@ package edu.cornell.mannlib.vitro.webapp.search.beans; -import java.util.Date; -import java.util.Iterator; +import java.util.Iterator; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; public interface ObjectSourceIface { - Iterator getAllOfThisTypeIterator(); + Iterator getAllOfThisTypeIterator(); - Iterator getUpdatedSinceIterator(long msSinceEpoc); + Iterator getUpdatedSinceIterator(long msSinceEpoc); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java index c3997edca..d8b28f2fc 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java @@ -107,8 +107,10 @@ public class IndexBuilder implements Runnable { long since = indexer.getModified() - 60000; Iterator sources = sourceList.iterator(); - List> listOfIterators = - new LinkedList>(); + + List> listOfIterators = + new LinkedList>(); + while (sources.hasNext()) { Object obj = sources.next(); if (obj != null && obj instanceof ObjectSourceIface) @@ -219,10 +221,10 @@ public class IndexBuilder implements Runnable { * @param items * @return */ - protected void indexForSource(Iterator items , boolean newDocs){ - if( items == null ) return; - while(items.hasNext()){ - indexItem(items.next(), newDocs); + protected void indexForSource(Iterator individuals , boolean newDocs){ + if( individuals == null ) return; + while(individuals.hasNext()){ + indexItem(individuals.next(), newDocs); } } @@ -248,12 +250,17 @@ public class IndexBuilder implements Runnable { * @param item * @return */ - protected void indexItem( Object item, boolean newDoc){ + protected void indexItem( Individual ind, boolean newDoc){ try{ - indexer.index(item, newDoc); + if( ind == null ) + return; + if( ind.getVClasses() == null || ind.getVClasses().size() < 1 ) + return; + + indexer.index(ind, newDoc); }catch(Throwable ex){ log.debug("IndexBuilder.indexItem() Error indexing " - + item + "\n" +ex); + + ind + "\n" +ex); } return ; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexerIface.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexerIface.java index 25f901481..c8a9da1ee 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexerIface.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexerIface.java @@ -2,10 +2,11 @@ package edu.cornell.mannlib.vitro.webapp.search.indexing; -import java.util.List; - -import edu.cornell.mannlib.vitro.webapp.search.IndexingException; -import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; +import java.util.List; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.search.IndexingException; +import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; /** * IntexerIface is for objects that will be used by the IndexBuilder. The @@ -21,7 +22,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; public interface IndexerIface { public void addObj2Doc(Obj2DocIface o2d); - public List getObj2DocList(); + public List getObj2DocList(); /** * Check if indexing is currently running in a different thread. @@ -37,7 +38,7 @@ public interface IndexerIface { * @param newDoc - if true, just insert doc, if false attempt to update. * @throws IndexingException */ - public void index(Object doc, boolean newDoc)throws IndexingException; + public void index(Individual ind, boolean newDoc)throws IndexingException; /** @@ -45,7 +46,7 @@ public interface IndexerIface { * @param obj * @throws IndexingException */ - public void removeFromIndex(Object obj ) throws IndexingException; + public void removeFromIndex(Individual ind) throws IndexingException; /** * Removes all documents from the index. diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 31b600350..1852c4f62 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -75,27 +75,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{ private static String entClassName = Individual.class.getName(); public boolean canTranslate(Object obj) { - if(obj != null && obj instanceof Individual){ - Individual ind = (Individual)obj; - List vclasses = ind.getVClasses(); - if( vclasses == null || vclasses.size() < 1 ){ - return false; - } - for( VClass c : vclasses ){ - if( c != null) - if (VitroVocabulary.RDF_TYPE.equals(c.getURI())) - return false; - else if ( OWL.OBJECTPROPERTY.stringValue().equals((c.getURI()))) - return false; - else if ( OWL.DATATYPEPROPERTY.stringValue().equals((c.getURI()))) - return false; - else if ( OWL.ANNOTATIONPROPERTY.stringValue().equals((c.getURI()))) - return false; - } - return true; - }else{ - return false; - } + return (obj != null && obj instanceof Individual); } @SuppressWarnings("static-access") diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java index 0b1ae4dcd..bc78bd824 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java @@ -20,6 +20,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; @@ -175,14 +176,13 @@ public class LuceneIndexer implements IndexerIface { public synchronized Analyzer getAnalyzer(){ return analyzer; } - + /** * Indexes an object. startIndexing() must be called before this method * to setup the modifier. * - */ - public synchronized void index(Object obj, boolean newDoc) - throws IndexingException { + */ + public void index(Individual ind, boolean newDoc) throws IndexingException { if( ! indexing ) throw new IndexingException("LuceneIndexer: must call " + "startIndexing() before index()."); @@ -193,11 +193,11 @@ public class LuceneIndexer implements IndexerIface { Iterator it = getObj2DocList().iterator(); while (it.hasNext()) { Obj2DocIface obj2doc = (Obj2DocIface) it.next(); - if (obj2doc.canTranslate(obj)) { + if (obj2doc.canTranslate(ind)) { if( !newDoc ){ - writer.deleteDocuments((Term)obj2doc.getIndexId(obj)); + writer.deleteDocuments((Term)obj2doc.getIndexId(ind)); } - Document d = (Document) obj2doc.translate(obj); + Document d = (Document) obj2doc.translate(ind); if( d != null) writer.addDocument(d); } @@ -210,9 +210,8 @@ public class LuceneIndexer implements IndexerIface { /** * Removes a single object from index. obj is translated * using the obj2DocList. - */ - public synchronized void removeFromIndex(Object obj ) - throws IndexingException{ + */ + public void removeFromIndex(Individual ind) throws IndexingException { if( writer == null ) throw new IndexingException("LuceneIndexer: cannot delete from " + "index, IndexWriter is null."); @@ -220,8 +219,8 @@ public class LuceneIndexer implements IndexerIface { Iterator it = getObj2DocList().iterator(); while (it.hasNext()) { Obj2DocIface obj2doc = (Obj2DocIface) it.next(); - if (obj2doc.canTranslate(obj)) { - writer.deleteDocuments((Term)obj2doc.getIndexId(obj)); + if (obj2doc.canTranslate(ind)) { + writer.deleteDocuments((Term)obj2doc.getIndexId(ind)); } } } catch (IOException ex) { @@ -310,5 +309,7 @@ public class LuceneIndexer implements IndexerIface { } // The directory is now empty so delete it return dir.delete(); - } + } + + }