Adding solr indexing prototype

This commit is contained in:
briancaruso 2011-04-11 17:21:38 +00:00
parent 9bfa6acbd5
commit 543c1cd945
9 changed files with 442 additions and 58 deletions

View file

@ -0,0 +1,45 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
public class IndividualToSolrDocument implements Obj2DocIface {
protected LuceneDocToSolrDoc luceneToSolr;
protected Entity2LuceneDoc entityToLucene;
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
entityToLucene = e2d;
luceneToSolr = new LuceneDocToSolrDoc();
}
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Individual;
}
@Override
public boolean canUnTranslate(Object result) {
return result != null && result instanceof SolrDocument;
}
@Override
public Object getIndexId(Object obj) {
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
}
@Override
public Object translate(Object obj) throws IndexingException {
return luceneToSolr.translate( entityToLucene.translate( obj ) );
}
@Override
public Object unTranslate(Object result) {
return luceneToSolr.unTranslate( result );
}
}

View file

@ -0,0 +1,60 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
/**
* Translate a lucene Document into a SolrDocument.
*/
public class LuceneDocToSolrDoc implements Obj2DocIface {
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Document;
}
@Override
public boolean canUnTranslate(Object result) {
return result != null && result instanceof SolrDocument;
}
@Override
public Object getIndexId(Object obj) {
//"this method isn't useful for solr"
return null;
}
@Override
public Object translate(Object obj) throws IndexingException {
Document luceneDoc = (Document)obj;
SolrInputDocument solrDoc = new SolrInputDocument();
for( Object f : luceneDoc.getFields()){
Field field = (Field)f;
solrDoc.addField( new String(field.name()), field.stringValue() );
}
return solrDoc;
}
@Override
public Object unTranslate(Object result) {
Individual ind = null;
if( result != null && result instanceof SolrDocument){
SolrDocument hit = (SolrDocument)result;
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
ind = new IndividualImpl();
ind.setURI(id);
}
return ind;
}
}

View file

@ -0,0 +1,163 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
public class SolrIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(SolrIndexer.class);
protected SolrServer server;
protected boolean indexing;
protected List<Obj2DocIface> obj2DocList;
protected HashSet<String> urisIndexed;
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){
this.server = server;
this.obj2DocList = o2d;
}
@Override
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException {
if( ! indexing )
throw new IndexingException("SolrIndexer: must call " +
"startIndexing() before index().");
if( ind == null )
log.debug("Individual to index was null, ignoring.");
try{
if( urisIndexed.contains(ind.getURI()) ){
log.debug("already indexed " + ind.getURI() );
return;
}else{
urisIndexed.add(ind.getURI());
log.debug("indexing " + ind.getURI());
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
while (it.hasNext()) {
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
if (obj2doc.canTranslate(ind)) {
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
if( solrDoc != null){
//sending each doc individually is inefficient
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
docs.add( solrDoc );
server.add( docs );
// if( !newDoc ){
// server.add( docs );
// log.debug("updated " + ind.getName() + " " + ind.getURI());
// }else{
// server.add( docs );
// log.debug("added " + ind.getName() + " " + ind.getURI());
// }
}else{
log.debug("removing from index " + ind.getURI());
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
}
}
}
}
} catch (IOException ex) {
throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage());
}
}
@Override
public boolean isIndexing() {
return indexing;
}
@Override
public void prepareForRebuild() throws IndexingException {
// TODO Auto-generated method stub
}
@Override
public void removeFromIndex(Individual ind) throws IndexingException {
// TODO Auto-generated method stub
}
@Override
public synchronized void startIndexing() throws IndexingException {
while( indexing ){ //wait for indexing to end.
log.debug("LuceneIndexer.startIndexing() waiting...");
try{ wait(); } catch(InterruptedException ex){}
}
log.debug("Starting to index");
indexing = true;
urisIndexed = new HashSet<String>();
notifyAll();
}
public synchronized void addObj2Doc(Obj2DocIface o2d) {
if (o2d != null)
obj2DocList.add(o2d);
}
public synchronized List<Obj2DocIface> getObj2DocList() {
return obj2DocList;
}
@Override
public void abortIndexingAndCleanUp() {
endIndexing();
}
@Override
public synchronized void endIndexing() {
try {
server.commit();
} catch (Exception e) {
log.error("Could not commit to solr server", e);
}
try {
server.optimize();
} catch (Exception e) {
log.error("Could not optimize solr server", e);
}
indexing = false;
notifyAll();
}
@Override
public long getModified() {
// TODO Auto-generated method stub
return 0;
}
public boolean isIndexEmpty() {
// TODO Auto-generated method stub
return false;
}
}

View file

@ -0,0 +1,132 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import com.hp.hpl.jena.ontology.OntModel;
import edu.cornell.mannlib.vitro.webapp.beans.BaseResourceBean.RoleLevel;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
public class SolrSetup implements javax.servlet.ServletContextListener{
private static final Log log = LogFactory.getLog(SolrSetup.class.getName());
protected static final String LOCAL_SOLR_SERVER = "vitro.local.solr.server";
@Override
public void contextInitialized(ServletContextEvent sce) {
if (AbortStartup.isStartupAborted(sce.getServletContext())) {
return;
}
try {
ServletContext context = sce.getServletContext();
/* setup the http connection with the solr server */
String solrServerUrl = ConfigurationProperties.getBean(sce).getProperty("vitro.local.solr.url");
if( solrServerUrl == null ){
log.error("Could not find vitro.local.solr.url in deploy.properties. "+
"Vitro application needs a URL of a solr server that it can use to index its data. " +
"The it should be something like http://localhost:${port}" + context.getContextPath() + "solr"
);
return;
}
CommonsHttpSolrServer server;
server = new CommonsHttpSolrServer( solrServerUrl );
server.setSoTimeout(1000); // socket read timeout
server.setConnectionTimeout(100);
server.setDefaultMaxConnectionsPerHost(100);
server.setMaxTotalConnections(100);
server.setMaxRetries(1);
context.setAttribute(LOCAL_SOLR_SERVER, server);
/* setup the individual to solr doc translation */
//first we need a ent2luceneDoc translator
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
o2d.add(indToSolrDoc);
/* setup solr indexer */
SolrIndexer solrIndexer = new SolrIndexer(server, o2d);
if( solrIndexer.isIndexEmpty() ){
log.info("solr index is empty, requesting rebuild");
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
}
// This is where the builder gets the list of places to try to
// get objects to index. It is filtered so that non-public text
// does not get into the search index.
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
wadf = new WebappDaoFactoryFiltering(wadf, vf);
List<ObjectSourceIface> sources = new ArrayList<ObjectSourceIface>();
sources.add(wadf.getIndividualDao());
IndexBuilder builder = new IndexBuilder(context, solrIndexer, sources);
// to the servlet context so we can access it later in the webapp.
context.setAttribute(IndexBuilder.class.getName(), builder);
// set up listeners so search index builder is notified of changes to model
ServletContext ctx = sce.getServletContext();
SearchReindexingListener srl = new SearchReindexingListener(builder);
ModelContext.registerListenerForChanges(ctx, srl);
if( sce.getServletContext().getAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean &&
(Boolean)sce.getServletContext().getAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP) ){
log.info("Rebuild of solr index required before startup.");
builder.doIndexRebuild();
int n = 0;
while( builder.isReindexRequested() || builder.isIndexing() ){
n++;
if( n % 20 == 0 ) //output message every 10 sec.
log.info("Still rebuilding solr index");
Thread.sleep(500);
}
}
log.info("Setup of Solr index completed.");
} catch (Throwable e) {
log.error("could not setup local solr server",e);
}
}
@Override
public void contextDestroyed(ServletContextEvent sce) {
}
public static SolrServer getSolrServer(ServletContext ctx){
return (SolrServer) ctx.getAttribute(LOCAL_SOLR_SERVER);
}
}