Adding solr indexing prototype
This commit is contained in:
parent
9bfa6acbd5
commit
543c1cd945
9 changed files with 442 additions and 58 deletions
|
@ -46,14 +46,6 @@
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<schema name="example" version="1.2">
|
<schema name="example" version="1.2">
|
||||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
|
||||||
Applications should change this to reflect the nature of the search collection.
|
|
||||||
version="1.2" is Solr's version number for the schema syntax and semantics. It should
|
|
||||||
not normally be changed by applications.
|
|
||||||
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
|
||||||
1.1: multiValued attribute introduced, false by default
|
|
||||||
1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<types>
|
<types>
|
||||||
<!-- field type definitions. The "name" attribute is
|
<!-- field type definitions. The "name" attribute is
|
||||||
|
@ -426,36 +418,37 @@
|
||||||
when adding a document.
|
when adding a document.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
|
||||||
<field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
|
|
||||||
<field name="name" type="textgen" indexed="true" stored="true"/>
|
|
||||||
<field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
|
|
||||||
<field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
|
|
||||||
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" />
|
|
||||||
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
|
||||||
|
|
||||||
<field name="weight" type="float" indexed="true" stored="true"/>
|
|
||||||
<field name="price" type="float" indexed="true" stored="true"/>
|
|
||||||
<field name="popularity" type="int" indexed="true" stored="true" />
|
|
||||||
<field name="inStock" type="boolean" indexed="true" stored="true" />
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Common metadata fields, named specifically to match up with
|
|
||||||
SolrCell metadata when parsing rich documents such as Word, PDF.
|
<!-- **************************** Vitro Fields *************************** -->
|
||||||
Some fields are multiValued only because Tika currently may return
|
|
||||||
multiple values for them.
|
|
||||||
-->
|
|
||||||
<field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
|
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||||
<field name="subject" type="text" indexed="true" stored="true"/>
|
|
||||||
<field name="description" type="text" indexed="true" stored="true"/>
|
<field name="type" type="string" indexed="true" stored="true" omitNorms="ture" multiValued="true"/>
|
||||||
<field name="comments" type="text" indexed="true" stored="true"/>
|
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="author" type="textgen" indexed="true" stored="true"/>
|
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="keywords" type="textgen" indexed="true" stored="true"/>
|
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="category" type="textgen" indexed="true" stored="true"/>
|
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||||
<field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
|
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||||
<field name="last_modified" type="date" indexed="true" stored="true"/>
|
<field name="name" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
|
<field name="nameunstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="nameunanalyzed" type="string" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="nameraw" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
|
||||||
|
<field name="moniker" type="ignored" />
|
||||||
|
<field name="modType" type="ignored"/>
|
||||||
|
<field name="JCLASS" type="ignored"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- catchall field, containing all other searchable text fields (implemented
|
<!-- catchall field, containing all other searchable text fields (implemented
|
||||||
|
@ -466,19 +459,7 @@
|
||||||
leading wildcard queries. -->
|
leading wildcard queries. -->
|
||||||
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
|
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
||||||
<!-- non-tokenized version of manufacturer to make it easier to sort or group
|
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
||||||
results by manufacturer. copied from "manu" via copyField -->
|
|
||||||
<field name="manu_exact" type="string" indexed="true" stored="false"/>
|
|
||||||
|
|
||||||
<field name="payloads" type="payloads" indexed="true" stored="true"/>
|
|
||||||
|
|
||||||
<!-- Uncommenting the following will create a "timestamp" field using
|
|
||||||
a default value of "NOW" to indicate when each document was indexed.
|
|
||||||
-->
|
|
||||||
<!--
|
|
||||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
|
||||||
-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||||
will be used if the name matches any of the patterns.
|
will be used if the name matches any of the patterns.
|
||||||
|
@ -521,10 +502,10 @@
|
||||||
<!-- Field to use to determine and enforce document uniqueness.
|
<!-- Field to use to determine and enforce document uniqueness.
|
||||||
Unless this field is marked with required="false", it will be a required field
|
Unless this field is marked with required="false", it will be a required field
|
||||||
-->
|
-->
|
||||||
<uniqueKey>id</uniqueKey>
|
<uniqueKey>DocId</uniqueKey>
|
||||||
|
|
||||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||||
<defaultSearchField>text</defaultSearchField>
|
<defaultSearchField>ALLTEXT</defaultSearchField>
|
||||||
|
|
||||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||||
<solrQueryParser defaultOperator="OR"/>
|
<solrQueryParser defaultOperator="OR"/>
|
||||||
|
@ -533,12 +514,7 @@
|
||||||
is added to the index. It's used either to index the same field differently,
|
is added to the index. It's used either to index the same field differently,
|
||||||
or to add multiple fields to the same field for easier/faster searching. -->
|
or to add multiple fields to the same field for easier/faster searching. -->
|
||||||
|
|
||||||
<copyField source="cat" dest="text"/>
|
<!-- <copyField source="name" dest="text"/> -->
|
||||||
<copyField source="name" dest="text"/>
|
|
||||||
<copyField source="manu" dest="text"/>
|
|
||||||
<copyField source="features" dest="text"/>
|
|
||||||
<copyField source="includes" dest="text"/>
|
|
||||||
<copyField source="manu" dest="manu_exact"/>
|
|
||||||
|
|
||||||
<!-- Above, multiple source fields are copied to the [text] field.
|
<!-- Above, multiple source fields are copied to the [text] field.
|
||||||
Another way to map multiple source fields to the same
|
Another way to map multiple source fields to the same
|
||||||
|
|
|
@ -68,7 +68,7 @@
|
||||||
<!-- Used to specify an alternate directory to hold all index data
|
<!-- Used to specify an alternate directory to hold all index data
|
||||||
other than the default ./data under the Solr home.
|
other than the default ./data under the Solr home.
|
||||||
If replication is in use, this should match the replication configuration. -->
|
If replication is in use, this should match the replication configuration. -->
|
||||||
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
<!-- <dataDir>${solr.data.dir:./solr/data}</dataDir> -->
|
||||||
|
|
||||||
|
|
||||||
<!-- WARNING: this <indexDefaults> section only provides defaults for index writers
|
<!-- WARNING: this <indexDefaults> section only provides defaults for index writers
|
||||||
|
|
|
@ -52,6 +52,14 @@ VitroConnection.DataSource.url = jdbc:mysql://localhost/vitro
|
||||||
VitroConnection.DataSource.username = vitroweb
|
VitroConnection.DataSource.username = vitroweb
|
||||||
VitroConnection.DataSource.password = vitrovitro
|
VitroConnection.DataSource.password = vitrovitro
|
||||||
|
|
||||||
|
#
|
||||||
|
# The URL to connect to for the Solr service that is used by the application.
|
||||||
|
# The Solr service provides the application with full text search and many
|
||||||
|
# other features. If you leave this commented out the application will attempt to use
|
||||||
|
# the solr from the same tomcat server at the context ${webapp.name}solr
|
||||||
|
#
|
||||||
|
# vitro.local.solr.url =
|
||||||
|
|
||||||
#
|
#
|
||||||
# The name of your first admin user for the VIVO application. The password
|
# The name of your first admin user for the VIVO application. The password
|
||||||
# for this user is initially set to "defaultAdmin", but you will be asked to
|
# for this user is initially set to "defaultAdmin", but you will be asked to
|
||||||
|
|
BIN
webapp/lib/apache-solr-solrj-1.4.1.jar
Normal file
BIN
webapp/lib/apache-solr-solrj-1.4.1.jar
Normal file
Binary file not shown.
BIN
webapp/lib/jcl-over-slf4j-1.5.5.jar
Normal file
BIN
webapp/lib/jcl-over-slf4j-1.5.5.jar
Normal file
Binary file not shown.
|
@ -0,0 +1,45 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||||
|
|
||||||
|
public class IndividualToSolrDocument implements Obj2DocIface {
|
||||||
|
|
||||||
|
protected LuceneDocToSolrDoc luceneToSolr;
|
||||||
|
protected Entity2LuceneDoc entityToLucene;
|
||||||
|
|
||||||
|
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||||
|
entityToLucene = e2d;
|
||||||
|
luceneToSolr = new LuceneDocToSolrDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canTranslate(Object obj) {
|
||||||
|
return obj != null && obj instanceof Individual;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canUnTranslate(Object result) {
|
||||||
|
return result != null && result instanceof SolrDocument;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getIndexId(Object obj) {
|
||||||
|
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object translate(Object obj) throws IndexingException {
|
||||||
|
return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object unTranslate(Object result) {
|
||||||
|
return luceneToSolr.unTranslate( result );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Translate a lucene Document into a SolrDocument.
|
||||||
|
*/
|
||||||
|
public class LuceneDocToSolrDoc implements Obj2DocIface {
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canTranslate(Object obj) {
|
||||||
|
return obj != null && obj instanceof Document;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean canUnTranslate(Object result) {
|
||||||
|
return result != null && result instanceof SolrDocument;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getIndexId(Object obj) {
|
||||||
|
//"this method isn't useful for solr"
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object translate(Object obj) throws IndexingException {
|
||||||
|
Document luceneDoc = (Document)obj;
|
||||||
|
SolrInputDocument solrDoc = new SolrInputDocument();
|
||||||
|
|
||||||
|
for( Object f : luceneDoc.getFields()){
|
||||||
|
Field field = (Field)f;
|
||||||
|
solrDoc.addField( new String(field.name()), field.stringValue() );
|
||||||
|
}
|
||||||
|
return solrDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object unTranslate(Object result) {
|
||||||
|
Individual ind = null;
|
||||||
|
if( result != null && result instanceof SolrDocument){
|
||||||
|
SolrDocument hit = (SolrDocument)result;
|
||||||
|
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
|
||||||
|
ind = new IndividualImpl();
|
||||||
|
ind.setURI(id);
|
||||||
|
}
|
||||||
|
return ind;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,163 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
||||||
|
|
||||||
|
public class SolrIndexer implements IndexerIface {
|
||||||
|
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
||||||
|
|
||||||
|
protected SolrServer server;
|
||||||
|
protected boolean indexing;
|
||||||
|
protected List<Obj2DocIface> obj2DocList;
|
||||||
|
protected HashSet<String> urisIndexed;
|
||||||
|
|
||||||
|
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){
|
||||||
|
this.server = server;
|
||||||
|
this.obj2DocList = o2d;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException {
|
||||||
|
|
||||||
|
if( ! indexing )
|
||||||
|
throw new IndexingException("SolrIndexer: must call " +
|
||||||
|
"startIndexing() before index().");
|
||||||
|
|
||||||
|
if( ind == null )
|
||||||
|
log.debug("Individual to index was null, ignoring.");
|
||||||
|
|
||||||
|
try{
|
||||||
|
if( urisIndexed.contains(ind.getURI()) ){
|
||||||
|
log.debug("already indexed " + ind.getURI() );
|
||||||
|
return;
|
||||||
|
}else{
|
||||||
|
urisIndexed.add(ind.getURI());
|
||||||
|
log.debug("indexing " + ind.getURI());
|
||||||
|
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
|
||||||
|
while (it.hasNext()) {
|
||||||
|
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
|
||||||
|
if (obj2doc.canTranslate(ind)) {
|
||||||
|
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
|
||||||
|
if( solrDoc != null){
|
||||||
|
//sending each doc individually is inefficient
|
||||||
|
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
||||||
|
docs.add( solrDoc );
|
||||||
|
server.add( docs );
|
||||||
|
// if( !newDoc ){
|
||||||
|
// server.add( docs );
|
||||||
|
// log.debug("updated " + ind.getName() + " " + ind.getURI());
|
||||||
|
// }else{
|
||||||
|
// server.add( docs );
|
||||||
|
// log.debug("added " + ind.getName() + " " + ind.getURI());
|
||||||
|
// }
|
||||||
|
}else{
|
||||||
|
log.debug("removing from index " + ind.getURI());
|
||||||
|
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new IndexingException(ex.getMessage());
|
||||||
|
} catch (SolrServerException ex) {
|
||||||
|
throw new IndexingException(ex.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isIndexing() {
|
||||||
|
return indexing;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prepareForRebuild() throws IndexingException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void removeFromIndex(Individual ind) throws IndexingException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void startIndexing() throws IndexingException {
|
||||||
|
while( indexing ){ //wait for indexing to end.
|
||||||
|
log.debug("LuceneIndexer.startIndexing() waiting...");
|
||||||
|
try{ wait(); } catch(InterruptedException ex){}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug("Starting to index");
|
||||||
|
indexing = true;
|
||||||
|
urisIndexed = new HashSet<String>();
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public synchronized void addObj2Doc(Obj2DocIface o2d) {
|
||||||
|
if (o2d != null)
|
||||||
|
obj2DocList.add(o2d);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized List<Obj2DocIface> getObj2DocList() {
|
||||||
|
return obj2DocList;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void abortIndexingAndCleanUp() {
|
||||||
|
endIndexing();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void endIndexing() {
|
||||||
|
try {
|
||||||
|
server.commit();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Could not commit to solr server", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
server.optimize();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Could not optimize solr server", e);
|
||||||
|
}
|
||||||
|
indexing = false;
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getModified() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isIndexEmpty() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,132 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.servlet.ServletContext;
|
||||||
|
import javax.servlet.ServletContextEvent;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
|
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.BaseResourceBean.RoleLevel;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||||
|
|
||||||
|
public class SolrSetup implements javax.servlet.ServletContextListener{
|
||||||
|
private static final Log log = LogFactory.getLog(SolrSetup.class.getName());
|
||||||
|
|
||||||
|
protected static final String LOCAL_SOLR_SERVER = "vitro.local.solr.server";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void contextInitialized(ServletContextEvent sce) {
|
||||||
|
if (AbortStartup.isStartupAborted(sce.getServletContext())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
ServletContext context = sce.getServletContext();
|
||||||
|
|
||||||
|
/* setup the http connection with the solr server */
|
||||||
|
String solrServerUrl = ConfigurationProperties.getBean(sce).getProperty("vitro.local.solr.url");
|
||||||
|
if( solrServerUrl == null ){
|
||||||
|
log.error("Could not find vitro.local.solr.url in deploy.properties. "+
|
||||||
|
"Vitro application needs a URL of a solr server that it can use to index its data. " +
|
||||||
|
"The it should be something like http://localhost:${port}" + context.getContextPath() + "solr"
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
CommonsHttpSolrServer server;
|
||||||
|
server = new CommonsHttpSolrServer( solrServerUrl );
|
||||||
|
server.setSoTimeout(1000); // socket read timeout
|
||||||
|
server.setConnectionTimeout(100);
|
||||||
|
server.setDefaultMaxConnectionsPerHost(100);
|
||||||
|
server.setMaxTotalConnections(100);
|
||||||
|
server.setMaxRetries(1);
|
||||||
|
context.setAttribute(LOCAL_SOLR_SERVER, server);
|
||||||
|
|
||||||
|
/* setup the individual to solr doc translation */
|
||||||
|
//first we need a ent2luceneDoc translator
|
||||||
|
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||||
|
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||||
|
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||||
|
new IndividualProhibitedFromSearch(context) );
|
||||||
|
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||||
|
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
|
||||||
|
o2d.add(indToSolrDoc);
|
||||||
|
|
||||||
|
/* setup solr indexer */
|
||||||
|
SolrIndexer solrIndexer = new SolrIndexer(server, o2d);
|
||||||
|
if( solrIndexer.isIndexEmpty() ){
|
||||||
|
log.info("solr index is empty, requesting rebuild");
|
||||||
|
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is where the builder gets the list of places to try to
|
||||||
|
// get objects to index. It is filtered so that non-public text
|
||||||
|
// does not get into the search index.
|
||||||
|
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||||
|
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
|
||||||
|
wadf = new WebappDaoFactoryFiltering(wadf, vf);
|
||||||
|
List<ObjectSourceIface> sources = new ArrayList<ObjectSourceIface>();
|
||||||
|
sources.add(wadf.getIndividualDao());
|
||||||
|
|
||||||
|
IndexBuilder builder = new IndexBuilder(context, solrIndexer, sources);
|
||||||
|
// to the servlet context so we can access it later in the webapp.
|
||||||
|
context.setAttribute(IndexBuilder.class.getName(), builder);
|
||||||
|
|
||||||
|
// set up listeners so search index builder is notified of changes to model
|
||||||
|
ServletContext ctx = sce.getServletContext();
|
||||||
|
SearchReindexingListener srl = new SearchReindexingListener(builder);
|
||||||
|
ModelContext.registerListenerForChanges(ctx, srl);
|
||||||
|
|
||||||
|
if( sce.getServletContext().getAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean &&
|
||||||
|
(Boolean)sce.getServletContext().getAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP) ){
|
||||||
|
log.info("Rebuild of solr index required before startup.");
|
||||||
|
builder.doIndexRebuild();
|
||||||
|
int n = 0;
|
||||||
|
while( builder.isReindexRequested() || builder.isIndexing() ){
|
||||||
|
n++;
|
||||||
|
if( n % 20 == 0 ) //output message every 10 sec.
|
||||||
|
log.info("Still rebuilding solr index");
|
||||||
|
Thread.sleep(500);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Setup of Solr index completed.");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
log.error("could not setup local solr server",e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void contextDestroyed(ServletContextEvent sce) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static SolrServer getSolrServer(ServletContext ctx){
|
||||||
|
return (SolrServer) ctx.getAttribute(LOCAL_SOLR_SERVER);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue