Improving SearchReindexingListener so it does a better job with rdf:type changes. NIHVIVO-724

This commit is contained in:
bdc34 2010-07-02 19:13:20 +00:00
parent fc8e41feec
commit d470d49c48
6 changed files with 146 additions and 115 deletions

View file

@ -6,140 +6,156 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelChangedListener;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.dao.jena.event.EditEvent;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
public class SearchReindexingListener implements ModelChangedListener {
private ServletContext context;
/**
* This class is thread safe.
*/
public class SearchReindexingListener implements ModelChangedListener {
private HashSet<String> changedUris;
private IndexBuilder indexBuilder;
public SearchReindexingListener(OntModel ontModel, ServletContext sc) {
this.context = sc;
public SearchReindexingListener(IndexBuilder indexBuilder) {
if(indexBuilder == null )
throw new IllegalArgumentException("Constructor parameter indexBuilder must not be null");
this.indexBuilder = indexBuilder;
this.changedUris = new HashSet<String>();
}
public void notifyEvent(Model arg0, Object arg1) {
if ( (arg1 instanceof EditEvent) ){
EditEvent editEvent = (EditEvent)arg1;
if( editEvent.getBegin() ){
}else{ // editEvent is the end of an edit
log.debug("doing search index build");
IndexBuilder builder = (IndexBuilder) context.getAttribute(IndexBuilder.class.getName());
if( builder != null ){
for( String uri: getAndClearChangedUris()){
builder.addToChangedUris(uri);
}
new Thread(builder).start();
}else{
log.debug("Could not get IndexBuilder from servlet context, cannot create index for full text seraching.");
getAndClearChangedUris(); //clear list of changes because they cannot be indexed.
}
}
}
}
private boolean isNormalPredicate(Property p) {
if( p == null ) return false;
/* currently the only predicate that is filtered out is rdf:type.
* It may be useful to improve this so that it may be configured
* at run time.*/
if( RDF.type.equals( p ))
return false;
else
return true;
}
private synchronized Set<String> getAndClearChangedUris(){
log.debug("getting and clearing changed URIs.");
Set<String> out = changedUris;
changedUris = new HashSet<String>();
return out;
}
private synchronized void addChange(Statement stmt){
if( stmt == null ) return;
if( stmt.getSubject().isURIResource() ){
changedUris.add( stmt.getSubject().getURI());
//changedUris.add( stmt.getSubject().getURI());
indexBuilder.addToChangedUris(stmt.getSubject().getURI());
log.debug(stmt.getSubject().getURI());
}
if( stmt.getObject().isURIResource() && isNormalPredicate( stmt.getPredicate() ) ){
changedUris.add( ((Resource) stmt.getObject().as(Resource.class)).getURI() );
if( stmt.getObject().isURIResource() ){
//changedUris.add( ((Resource) stmt.getObject().as(Resource.class)).getURI() );
indexBuilder.addToChangedUris(((Resource) stmt.getObject()).getURI());
log.debug(((Resource) stmt.getObject().as(Resource.class)).getURI());
}
}
// private synchronized Set<String> getAndClearChangedUris(){
// log.debug("Getting and clearing changed URIs.");
// Set<String> out = changedUris;
// changedUris = new HashSet<String>();
// return out;
// }
private void doAyncIndex(){
// for( String uri: getAndClearChangedUris()){
// indexBuilder.addToChangedUris(uri);
// }
new Thread(indexBuilder).start();
}
@Override
public void notifyEvent(Model arg0, Object arg1) {
if ( (arg1 instanceof EditEvent) ){
EditEvent editEvent = (EditEvent)arg1;
if( !editEvent.getBegin() ){// editEvent is the end of an edit
log.debug("Doing search index build at end of EditEvent");
doAyncIndex();
}
} else{
log.debug("ignoring event " + arg1.getClass().getName() + " "+ arg1 );
}
}
@Override
public void addedStatement(Statement stmt) {
addChange(stmt);
//doAyncIndex();
}
public void addedStatements(Statement[] arg0) {
for(Statement stmt : arg0)
addChange(stmt);
}
public void addedStatements(List arg0) {
for(Statement stmt : (List<Statement>)arg0)
addChange(stmt);
}
public void addedStatements(StmtIterator arg0) {
if( arg0 != null ){
while( arg0.hasNext() ){
addChange(arg0.nextStatement());
}
}
}
public void addedStatements(Model arg0) {
if( arg0 != null)
addedStatements(arg0.listStatements());
}
@Override
public void removedStatement(Statement stmt){
addChange(stmt);
}
public void removedStatements(Statement[] arg0) {
for(Statement stmt : arg0)
addChange(stmt);
}
public void removedStatements(List arg0) {
for(Statement stmt : (List<Statement>)arg0)
addChange(stmt);
}
public void removedStatements(StmtIterator arg0) {
if( arg0 != null ){
while( arg0.hasNext() ){
addChange(arg0.nextStatement());
}
}
}
public void removedStatements(Model arg0) {
if( arg0 != null)
removedStatements(arg0.listStatements());
//doAyncIndex();
}
private static final Log log = LogFactory.getLog(SearchReindexingListener.class.getName());
@Override
public void addedStatements(Statement[] arg0) {
for( Statement s: arg0){
addChange(s);
}
//doAyncIndex();
}
@Override
public void addedStatements(List<Statement> arg0) {
for( Statement s: arg0){
addChange(s);
}
//doAyncIndex();
}
@Override
public void addedStatements(StmtIterator arg0) {
try{
while(arg0.hasNext()){
Statement s = arg0.nextStatement();
addChange(s);
}
}finally{
arg0.close();
}
//doAyncIndex();
}
@Override
public void addedStatements(Model m) {
m.enterCriticalSection(Lock.READ);
StmtIterator it = null;
try{
it = m.listStatements();
while(it.hasNext()){
addChange(it.nextStatement());
}
}finally{
if( it != null ) it.close();
m.leaveCriticalSection();
}
//doAyncIndex();
}
@Override
public void removedStatements(Statement[] arg0) {
//same as add stmts
this.addedStatements(arg0);
}
@Override
public void removedStatements(List<Statement> arg0) {
//same as add
this.addedStatements(arg0);
}
@Override
public void removedStatements(StmtIterator arg0) {
//same as add
this.addedStatements(arg0);
}
@Override
public void removedStatements(Model arg0) {
//same as add
this.addedStatements(arg0);
}
}

View file

@ -3,6 +3,7 @@
package edu.cornell.mannlib.vitro.webapp.search.indexing;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@ -46,7 +47,7 @@ public class IndexBuilder implements Runnable {
ProhibitedFromSearch classesProhibitedFromSearch = null;
long lastRun = 0;
List<String> changedUris = null;
Collection<String> changedUris = null;
public static final boolean UPDATE_DOCS = false;
public static final boolean NEW_DOCS = true;
@ -60,7 +61,7 @@ public class IndexBuilder implements Runnable {
this.sourceList = sources;
this.context = context;
changedUris = new LinkedList<String>();
changedUris = new HashSet<String>();
}
public void addObjectSource(ObjectSourceIface osi) {
@ -210,9 +211,9 @@ public class IndexBuilder implements Runnable {
indexForSource((Iterator)obj, newDocs);
}
} catch (IndexingException ex) {
log.error("\t" + ex.getMessage(),ex);
log.error(ex,ex);
} catch (Exception e) {
log.error("\t"+e.getMessage(),e);
log.error(e,e);
} finally {
indexer.endIndexing();
}

View file

@ -90,15 +90,18 @@ public class Entity2LuceneDoc implements Obj2DocIface{
//DocId
String id = ent.getURI();
if( id == null )
if( id == null ){
log.debug("cannot translate bnodes");
throw new IndexingException("Not indexing bnodes");
}
doc.add( new Field(term.DOCID, entClassName + id,
Field.Store.YES, Field.Index.NOT_ANALYZED));
//vitro Id
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED));
log.debug( id );
//java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED));
@ -124,6 +127,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
//rdf:type and ClassGroup
List<VClass> vclasses = ent.getVClasses(false);
for( VClass clz : vclasses){
log.debug( id + " as type " + clz.getURI() );
//document boost for given classes
if( clz.getSearchBoost() != null )
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );

View file

@ -193,13 +193,19 @@ public class LuceneIndexer implements IndexerIface {
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
while (it.hasNext()) {
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
if (obj2doc.canTranslate(ind)) {
if( !newDoc ){
writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
if (obj2doc.canTranslate(ind)) {
Document d = (Document) obj2doc.translate(ind);
if( d != null){
if( !newDoc ){
writer.updateDocument((Term)obj2doc.getIndexId(ind), d);
log.debug("updated " + ind.getName() + " " + ind.getURI());
}else{
writer.addDocument(d);
log.debug("added " + ind.getName() + " " + ind.getURI());
}
}else{
log.debug("could not translate " + ind.getURI());
}
Document d = (Document) obj2doc.translate(ind);
if( d != null)
writer.addDocument(d);
}
}
} catch (IOException ex) {
@ -220,7 +226,8 @@ public class LuceneIndexer implements IndexerIface {
while (it.hasNext()) {
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
if (obj2doc.canTranslate(ind)) {
writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
log.debug("deleted " + ind.getName() + " " + ind.getURI());
}
}
} catch (IOException ex) {

View file

@ -121,9 +121,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
//set up listeners so search index builder is notified of changes to model
OntModel baseOntModel = (OntModel)sce.getServletContext().getAttribute("baseOntModel");
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
SearchReindexingListener srl = new SearchReindexingListener(baseOntModel, sce.getServletContext());
OntModel inferenceModel = (OntModel) sce.getServletContext().getAttribute("inferenceOntModel");
SearchReindexingListener srl = new SearchReindexingListener(builder);
baseOntModel.getBaseModel().register(srl);
jenaOntModel.getBaseModel().register(srl);
inferenceModel.register(srl);
//set the classes that the indexBuilder ignores
OntModel displayOntModel = (OntModel)sce.getServletContext().getAttribute("displayOntModel");

View file

@ -112,7 +112,7 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
//set up listeners so search index builder is notified of changes to model
OntModel baseOntModel = (OntModel)sce.getServletContext().getAttribute("baseOntModel");
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
SearchReindexingListener srl = new SearchReindexingListener(baseOntModel, sce.getServletContext());
SearchReindexingListener srl = new SearchReindexingListener( builder );
baseOntModel.getBaseModel().register(srl);
jenaOntModel.getBaseModel().register(srl);