VIVO-870 Implement UpdateUrisTask.
At the same time, broke the old IndexBuilder, so no listener and no rebuild.
This commit is contained in:
parent
64624f2b84
commit
39f1ed0b27
11 changed files with 828 additions and 581 deletions
|
@ -110,23 +110,15 @@ public interface SearchIndexer extends Application.Module {
|
||||||
*/
|
*/
|
||||||
public static class Event {
|
public static class Event {
|
||||||
public enum Type {
|
public enum Type {
|
||||||
STARTUP,
|
STARTUP, PROGRESS,
|
||||||
|
|
||||||
START_PROCESSING_URIS,
|
START_PROCESSING_URIS, STOP_PROCESSING_URIS,
|
||||||
|
|
||||||
PROGRESS_PROCESSING_URIS,
|
START_PROCESSING_STATEMENTS, STOP_PROCESSING_STATEMENTS,
|
||||||
|
|
||||||
STOP_PROCESSING_URIS,
|
REBUILD_REQUESTED, REBUILD_COMPLETE,
|
||||||
|
|
||||||
START_PROCESSING_STATEMENTS,
|
SHUTDOWN_REQUESTED, SHUTDOWN_COMPLETE
|
||||||
|
|
||||||
PROGRESS_PROCESSING_STATEMENTS,
|
|
||||||
|
|
||||||
STOP_PROCESSING_STATEMENTS,
|
|
||||||
|
|
||||||
REBUILD_REQUESTED,
|
|
||||||
|
|
||||||
SHUTDOWN_REQUESTED
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Type type;
|
private final Type type;
|
||||||
|
|
|
@ -5,23 +5,34 @@ package edu.cornell.mannlib.vitro.webapp.modules.searchIndexer;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An immutable summary of the status of the SearchIndexer, at some point in
|
* An immutable summary of the status of the SearchIndexer, at a fixed point in
|
||||||
* time.Contains the current state, and some counts.
|
* time. Contains the current state, the time, and some counts.
|
||||||
*
|
*
|
||||||
* If the indexer is processing URIs, processing statements, or preparing a
|
* If the indexer is processing URIs, processing statements, or preparing a
|
||||||
* rebuild, the counts are URI_COUNTS, STATEMENT_COUNTS, or REBUILD_COUNTS.
|
* rebuild, the counts are URI_COUNTS, STATEMENT_COUNTS, or REBUILD_COUNTS,
|
||||||
|
* respectively.
|
||||||
*
|
*
|
||||||
* When the indexer starts up, and when it is is shut down, the counts are
|
* When the indexer starts up, becomes idle, or shuts down, the counts are
|
||||||
* NO_COUNTS.
|
* NO_COUNTS.
|
||||||
*
|
|
||||||
* If the indexer is idle, the counts are carried over from the previous
|
|
||||||
* operation.
|
|
||||||
*/
|
*/
|
||||||
public class SearchIndexerStatus {
|
public class SearchIndexerStatus {
|
||||||
public enum State {
|
// ----------------------------------------------------------------------
|
||||||
IDLE, PROCESSING_URIS, PROCESSING_STMTS, PREPARING_REBUILD, SHUTDOWN
|
// factory methods
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
public static SearchIndexerStatus idle() {
|
||||||
|
return new SearchIndexerStatus(State.IDLE, new Date(), new NoCounts());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static SearchIndexerStatus shutdown() {
|
||||||
|
return new SearchIndexerStatus(State.SHUTDOWN, new Date(),
|
||||||
|
new NoCounts());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// the instance
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
private final State state;
|
private final State state;
|
||||||
private final Date since;
|
private final Date since;
|
||||||
private final Counts counts;
|
private final Counts counts;
|
||||||
|
@ -44,6 +55,14 @@ public class SearchIndexerStatus {
|
||||||
return counts;
|
return counts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// helper classes
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
public enum State {
|
||||||
|
IDLE, PROCESSING_URIS, PROCESSING_STMTS, PREPARING_REBUILD, SHUTDOWN
|
||||||
|
}
|
||||||
|
|
||||||
public abstract static class Counts {
|
public abstract static class Counts {
|
||||||
public enum Type {
|
public enum Type {
|
||||||
URI_COUNTS, STATEMENT_COUNTS, REBUILD_COUNTS, NO_COUNTS
|
URI_COUNTS, STATEMENT_COUNTS, REBUILD_COUNTS, NO_COUNTS
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.modules.searchIndexer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some handy methods for dealing with the search index.
|
||||||
|
*/
|
||||||
|
public class SearchIndexerUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The document ID in the search index is derived from the individual's URI.
|
||||||
|
*/
|
||||||
|
public static String getIdForUri(String uri){
|
||||||
|
if( uri != null ){
|
||||||
|
return "vitroIndividual:" + uri;
|
||||||
|
}else{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,7 +18,6 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument;
|
|
||||||
|
|
||||||
|
|
||||||
public class SearchIndexer implements IndexerIface {
|
public class SearchIndexer implements IndexerIface {
|
||||||
|
@ -27,7 +26,6 @@ public class SearchIndexer implements IndexerIface {
|
||||||
protected SearchEngine server;
|
protected SearchEngine server;
|
||||||
protected boolean indexing;
|
protected boolean indexing;
|
||||||
protected HashSet<String> urisIndexed;
|
protected HashSet<String> urisIndexed;
|
||||||
protected IndividualToSearchDocument individualToSearchDoc;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* System is shutting down if true.
|
* System is shutting down if true.
|
||||||
|
@ -48,9 +46,8 @@ public class SearchIndexer implements IndexerIface {
|
||||||
*/
|
*/
|
||||||
protected boolean doingFullIndexRebuild = false;
|
protected boolean doingFullIndexRebuild = false;
|
||||||
|
|
||||||
public SearchIndexer( SearchEngine server, IndividualToSearchDocument indToDoc){
|
public SearchIndexer( SearchEngine server){
|
||||||
this.server = server;
|
this.server = server;
|
||||||
this.individualToSearchDoc = indToDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -74,7 +71,7 @@ public class SearchIndexer implements IndexerIface {
|
||||||
urisIndexed.add(ind.getURI());
|
urisIndexed.add(ind.getURI());
|
||||||
}
|
}
|
||||||
log.debug("indexing " + ind.getURI());
|
log.debug("indexing " + ind.getURI());
|
||||||
doc = individualToSearchDoc.translate(ind);
|
// doc = individualToSearchDoc.translate(ind);
|
||||||
|
|
||||||
if( doc != null){
|
if( doc != null){
|
||||||
if( log.isDebugEnabled()){
|
if( log.isDebugEnabled()){
|
||||||
|
@ -109,9 +106,9 @@ public class SearchIndexer implements IndexerIface {
|
||||||
public void removeFromIndex(String uri) throws IndexingException {
|
public void removeFromIndex(String uri) throws IndexingException {
|
||||||
if( uri != null ){
|
if( uri != null ){
|
||||||
try {
|
try {
|
||||||
server.deleteById(individualToSearchDoc.getIdForUri(uri));
|
// server.deleteById(individualToSearchDoc.getIdForUri(uri));
|
||||||
log.debug("deleted " + " " + uri);
|
log.debug("deleted " + " " + uri);
|
||||||
} catch (SearchEngineException e) {
|
} catch (Exception e) {
|
||||||
log.error( "could not delete individual " + uri, e);
|
log.error( "could not delete individual " + uri, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -136,7 +133,7 @@ public class SearchIndexer implements IndexerIface {
|
||||||
public void abortIndexingAndCleanUp() {
|
public void abortIndexingAndCleanUp() {
|
||||||
shutdownRequested = true;
|
shutdownRequested = true;
|
||||||
try{
|
try{
|
||||||
individualToSearchDoc.shutdown();
|
// individualToSearchDoc.shutdown();
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
if( log != null)
|
if( log != null)
|
||||||
log.debug(e,e);
|
log.debug(e,e);
|
||||||
|
|
|
@ -1,356 +0,0 @@
|
||||||
|
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXT;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXTUNSTEMMED;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.CLASSGROUP_URI;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.DOCID;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.INDEXEDTIME;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.MOST_SPECIFIC_TYPE_URIS;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE_SINGLE_VALUED;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.joda.time.DateTime;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
|
|
||||||
import com.hp.hpl.jena.shared.JenaException;
|
|
||||||
import com.hp.hpl.jena.vocabulary.OWL;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
|
||||||
|
|
||||||
public class IndividualToSearchDocument {
|
|
||||||
|
|
||||||
public static final Log log = LogFactory.getLog(IndividualToSearchDocument.class.getName());
|
|
||||||
|
|
||||||
protected final String label = "http://www.w3.org/2000/01/rdf-schema#label";
|
|
||||||
|
|
||||||
protected List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
|
|
||||||
|
|
||||||
protected List<SearchIndexExcluder> excludes;
|
|
||||||
|
|
||||||
public IndividualToSearchDocument(List<SearchIndexExcluder> excludes, List<DocumentModifier> docModifiers){
|
|
||||||
this.excludes = excludes;
|
|
||||||
this.documentModifiers = docModifiers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SearchInputDocument translate(Individual ind) throws IndexingException{
|
|
||||||
try{
|
|
||||||
String excludeMsg = checkExcludes( ind );
|
|
||||||
if( excludeMsg != DONT_EXCLUDE){
|
|
||||||
log.debug(ind.getURI() + " " + excludeMsg);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
SearchInputDocument doc = ApplicationUtils.instance().getSearchEngine().createInputDocument();
|
|
||||||
|
|
||||||
//DocID
|
|
||||||
doc.addField(DOCID, getIdForUri( ind.getURI() ) );
|
|
||||||
|
|
||||||
//vitro id
|
|
||||||
doc.addField(URI, ind.getURI());
|
|
||||||
log.debug(ind.getURI() + " init boost: " + doc.getDocumentBoost());
|
|
||||||
|
|
||||||
//get label from ind
|
|
||||||
addLabel(ind, doc);
|
|
||||||
|
|
||||||
//add classes, classgroups get if prohibited because of its class
|
|
||||||
StringBuffer classPublicNames = new StringBuffer("");
|
|
||||||
addClasses(ind, doc, classPublicNames);
|
|
||||||
addMostSpecificTypeUris( ind, doc );
|
|
||||||
|
|
||||||
log.debug(ind.getURI() + " post class boost: " + doc.getDocumentBoost());
|
|
||||||
|
|
||||||
// collecting URIs and rdfs:labels of objects of statements
|
|
||||||
StringBuffer objectNames = new StringBuffer("");
|
|
||||||
StringBuffer addUri = new StringBuffer("");
|
|
||||||
addObjectPropertyText(ind, doc, objectNames, addUri);
|
|
||||||
|
|
||||||
//time of index in msec past epoch
|
|
||||||
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis() );
|
|
||||||
|
|
||||||
addAllText( ind, doc, classPublicNames, objectNames );
|
|
||||||
|
|
||||||
//boost for entity
|
|
||||||
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0) {
|
|
||||||
doc.setDocumentBoost(ind.getSearchBoost());
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug(ind.getURI() + " pre mod boost: " + doc.getDocumentBoost());
|
|
||||||
|
|
||||||
runAdditionalDocModifers(ind,doc);
|
|
||||||
|
|
||||||
log.debug(ind.getURI() + " post mod boost: " + doc.getDocumentBoost());
|
|
||||||
|
|
||||||
return doc;
|
|
||||||
}catch(SkipIndividualException ex){
|
|
||||||
//indicates that this individual should not be indexed by returning null
|
|
||||||
log.debug(ex);
|
|
||||||
return null;
|
|
||||||
}catch(Exception th){
|
|
||||||
log.error(th,th);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected String checkExcludes(Individual ind) {
|
|
||||||
for( SearchIndexExcluder excluder : excludes){
|
|
||||||
try{
|
|
||||||
String msg = excluder.checkForExclusion(ind);
|
|
||||||
log.debug("individual=" + ind.getURI() + " (" + ind.getLabel()
|
|
||||||
+ "), excluder=" + excluder + ", types="
|
|
||||||
+ ind.getMostSpecificTypeURIs() + ", msg=" + msg);
|
|
||||||
if( msg != DONT_EXCLUDE)
|
|
||||||
return msg;
|
|
||||||
}catch (Exception e) {
|
|
||||||
return e.getMessage();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return DONT_EXCLUDE;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Map<String,Long> docModClassToTime = new HashMap<String,Long>();
|
|
||||||
protected long docModCount =0;
|
|
||||||
|
|
||||||
protected void runAdditionalDocModifers( Individual ind, SearchInputDocument doc )
|
|
||||||
throws SkipIndividualException{
|
|
||||||
//run the document modifiers
|
|
||||||
if( documentModifiers != null && !documentModifiers.isEmpty()){
|
|
||||||
docModCount++;
|
|
||||||
for(DocumentModifier modifier: documentModifiers){
|
|
||||||
|
|
||||||
long start = System.currentTimeMillis();
|
|
||||||
|
|
||||||
modifier.modifyDocument(ind, doc);
|
|
||||||
|
|
||||||
if( log.isDebugEnabled()){
|
|
||||||
long delta = System.currentTimeMillis() - start;
|
|
||||||
synchronized(docModClassToTime){
|
|
||||||
Class<?> clz = modifier.getClass();
|
|
||||||
if( docModClassToTime.containsKey( clz.getName() )){
|
|
||||||
Long time = docModClassToTime.get(clz.getName() );
|
|
||||||
docModClassToTime.put(clz.getName(), time + delta);
|
|
||||||
}else{
|
|
||||||
docModClassToTime.put(clz.getName(), delta);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( docModCount % 200 == 0 ){
|
|
||||||
log.debug("DocumentModifier timings");
|
|
||||||
for( Entry<String, Long> entry: docModClassToTime.entrySet()){
|
|
||||||
log.debug("average msec to run " + entry.getKey() + ": " + (entry.getValue()/docModCount));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addAllText(Individual ind, SearchInputDocument doc, StringBuffer classPublicNames, StringBuffer objectNames) {
|
|
||||||
String t=null;
|
|
||||||
//ALLTEXT, all of the 'full text'
|
|
||||||
StringBuffer allTextValue = new StringBuffer();
|
|
||||||
|
|
||||||
try{
|
|
||||||
//collecting data property statements
|
|
||||||
List<DataPropertyStatement> dataPropertyStatements = ind.getDataPropertyStatements();
|
|
||||||
if (dataPropertyStatements != null) {
|
|
||||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
|
||||||
while (dataPropertyStmtIter.hasNext()) {
|
|
||||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
|
||||||
if(dataPropertyStmt.getDatapropURI().equals(label)){ // we don't want label to be added to alltext
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
allTextValue.append(" ");
|
|
||||||
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}catch(JenaException je){
|
|
||||||
//VIVO-15 Trap for characters that cause search indexing to abort
|
|
||||||
log.error(String.format("Continuing to index %s but could not get all dataproperties because %s",ind.getURI(),je.getMessage()));
|
|
||||||
}
|
|
||||||
|
|
||||||
allTextValue.append(objectNames.toString());
|
|
||||||
|
|
||||||
allTextValue.append(' ');
|
|
||||||
allTextValue.append(classPublicNames);
|
|
||||||
|
|
||||||
try {
|
|
||||||
String stripped = Jsoup.parse(allTextValue.toString()).text();
|
|
||||||
allTextValue.setLength(0);
|
|
||||||
allTextValue.append(stripped);
|
|
||||||
} catch(Exception e) {
|
|
||||||
log.debug("Could not strip HTML during search indexing. " + e);
|
|
||||||
}
|
|
||||||
|
|
||||||
String alltext = allTextValue.toString();
|
|
||||||
|
|
||||||
doc.addField(ALLTEXT, alltext);
|
|
||||||
doc.addField(ALLTEXTUNSTEMMED, alltext);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the rdfs:labes for objects of statements and put in objectNames.
|
|
||||||
* Get the URIs for objects of statements and put in addUri.
|
|
||||||
*/
|
|
||||||
protected void addObjectPropertyText(Individual ind, SearchInputDocument doc,
|
|
||||||
StringBuffer objectNames, StringBuffer addUri) {
|
|
||||||
|
|
||||||
try{
|
|
||||||
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
|
|
||||||
if (objectPropertyStatements != null) {
|
|
||||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
|
||||||
while (objectPropertyStmtIter.hasNext()) {
|
|
||||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
|
||||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) ){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
objectNames.append(" ");
|
|
||||||
String t=null;
|
|
||||||
objectNames.append(((t=objectPropertyStmt.getObject().getRdfsLabel()) == null)?"":t);
|
|
||||||
addUri.append(" ");
|
|
||||||
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.debug("could not index name of related object: " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}catch(JenaException je){
|
|
||||||
//VIVO-15 Trap for characters that cause search indexing to abort
|
|
||||||
log.error(String.format("Continuing to index %s but could not get all object properties because %s",ind.getURI(),je.getMessage()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds the info about the classes that the individual is a member
|
|
||||||
* of, classgroups and checks if prohibited.
|
|
||||||
* @param classPublicNames
|
|
||||||
* @returns true if prohibited from search
|
|
||||||
* @throws SkipIndividualException
|
|
||||||
*/
|
|
||||||
protected void addClasses(Individual ind, SearchInputDocument doc, StringBuffer classPublicNames) throws SkipIndividualException{
|
|
||||||
List<VClass> vclasses = ind.getVClasses(false);
|
|
||||||
if( vclasses == null || vclasses.isEmpty() ){
|
|
||||||
throw new SkipIndividualException("Not indexing because individual has no classes");
|
|
||||||
}
|
|
||||||
|
|
||||||
for(VClass clz : vclasses){
|
|
||||||
if(clz.getURI() == null){
|
|
||||||
continue;
|
|
||||||
}else if(OWL.Thing.getURI().equals(clz.getURI())){
|
|
||||||
//don't add owl:Thing as the type in the index
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
if( clz.getSearchBoost() != null){
|
|
||||||
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.addField(RDFTYPE, clz.getURI());
|
|
||||||
|
|
||||||
if(clz.getName() != null){
|
|
||||||
classPublicNames.append(" ");
|
|
||||||
classPublicNames.append(clz.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
//Add the Classgroup URI to a field
|
|
||||||
if(clz.getGroupURI() != null){
|
|
||||||
doc.addField(CLASSGROUP_URI,clz.getGroupURI());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addMostSpecificTypeUris(Individual ind, SearchInputDocument doc){
|
|
||||||
List<String> mstURIs = ind.getMostSpecificTypeURIs();
|
|
||||||
if( mstURIs != null ){
|
|
||||||
for( String typeURI : mstURIs ){
|
|
||||||
if( typeURI != null && ! typeURI.trim().isEmpty() )
|
|
||||||
doc.addField(MOST_SPECIFIC_TYPE_URIS, typeURI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addLabel(Individual ind, SearchInputDocument doc) {
|
|
||||||
String value = "";
|
|
||||||
String label = ind.getRdfsLabel();
|
|
||||||
if (label != null) {
|
|
||||||
value = label;
|
|
||||||
} else {
|
|
||||||
value = ind.getLocalName();
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.addField(NAME_RAW, value);
|
|
||||||
doc.addField(NAME_LOWERCASE_SINGLE_VALUED,value);
|
|
||||||
|
|
||||||
// NAME_RAW will be copied by the search engine into the following fields:
|
|
||||||
// NAME_LOWERCASE, NAME_UNSTEMMED, NAME_STEMMED, NAME_PHONETIC, AC_NAME_UNTOKENIZED, AC_NAME_STEMMED
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object getIndexId(Object obj) {
|
|
||||||
throw new Error("IndiviudalToSearchDocument.getIndexId() is unimplemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getIdForUri(String uri){
|
|
||||||
if( uri != null ){
|
|
||||||
return "vitroIndividual:" + uri;
|
|
||||||
}else{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getQueryForId(String uri ){
|
|
||||||
return DOCID + ':' + getIdForUri(uri);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Individual unTranslate(Object result) {
|
|
||||||
Individual ent = null;
|
|
||||||
|
|
||||||
if( result instanceof SearchResultDocument){
|
|
||||||
SearchResultDocument hit = (SearchResultDocument) result;
|
|
||||||
String uri= (String) hit.getFirstValue(URI);
|
|
||||||
|
|
||||||
ent = new IndividualImpl();
|
|
||||||
ent.setURI(uri);
|
|
||||||
}
|
|
||||||
return ent;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void shutdown(){
|
|
||||||
for(DocumentModifier dm: documentModifiers){
|
|
||||||
try{
|
|
||||||
dm.shutdown();
|
|
||||||
}catch(Exception e){
|
|
||||||
if( log != null)
|
|
||||||
log.debug(e,e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final String DONT_EXCLUDE =null;
|
|
||||||
}
|
|
|
@ -3,79 +3,90 @@
|
||||||
package edu.cornell.mannlib.vitro.webapp.searchindex;
|
package edu.cornell.mannlib.vitro.webapp.searchindex;
|
||||||
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY;
|
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.REBUILD_REQUESTED;
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_COMPLETE;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.*;
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_REQUESTED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STOP_PROCESSING_STATEMENTS;
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STARTUP;
|
||||||
|
import static java.util.concurrent.TimeUnit.MINUTES;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.RejectedExecutionException;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
|
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.Application;
|
import edu.cornell.mannlib.vitro.webapp.modules.Application;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
|
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer;
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus;
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State;
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexingEventListener;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
|
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
|
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException;
|
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel;
|
import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevelStamp;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO A silly implementation that just wraps the old IndexBuilder.
|
* An implementation of the SearchIndexer interface.
|
||||||
|
*
|
||||||
|
* This implementation uses a single-threaded task queue to permit indexing to
|
||||||
|
* run one at a time in a "background" thread. The task queue is controlled by a
|
||||||
|
* scheduler that allows us to suspend incoming tasks (pause).
|
||||||
|
*
|
||||||
|
* A thread pool is available so the tasks can create small units of work to be
|
||||||
|
* run in parallel. Each task should block until all of its work units are
|
||||||
|
* complete, to preserve the pattern of running one task at a time.
|
||||||
*/
|
*/
|
||||||
public class SearchIndexerImpl implements SearchIndexer {
|
public class SearchIndexerImpl implements SearchIndexer {
|
||||||
private static final Log log = LogFactory.getLog(SearchIndexerImpl.class);
|
private static final Log log = LogFactory.getLog(SearchIndexerImpl.class);
|
||||||
|
|
||||||
private final ListenerList listeners = new ListenerList();
|
private final ListenerList listeners = new ListenerList();
|
||||||
|
private final TaskQueue taskQueue = new TaskQueue();
|
||||||
|
private final Scheduler scheduler = new Scheduler(taskQueue);
|
||||||
|
private final WorkerThreadPool pool = new WorkerThreadPool();
|
||||||
|
|
||||||
private ServletContext ctx;
|
private ServletContext ctx;
|
||||||
private Set<SearchIndexExcluder> excluders;
|
private Set<SearchIndexExcluder> excluders;
|
||||||
private Set<DocumentModifier> modifiers;
|
private Set<DocumentModifier> modifiers;
|
||||||
private Set<IndexingUriFinder> uriFinders;
|
private Set<IndexingUriFinder> uriFinders;
|
||||||
|
private WebappDaoFactory wadf;
|
||||||
// TODO
|
|
||||||
private IndexBuilder indexBuilder;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startup(Application application, ComponentStartupStatus ss) {
|
public void startup(Application application, ComponentStartupStatus ss) {
|
||||||
try {
|
try {
|
||||||
this.ctx = application.getServletContext();
|
this.ctx = application.getServletContext();
|
||||||
|
|
||||||
loadConfiguration();
|
loadConfiguration();
|
||||||
|
|
||||||
|
this.wadf = getFilteredWebappDaoFactory();
|
||||||
|
|
||||||
|
listeners.fireEvent(new Event(STARTUP, getStatus()));
|
||||||
ss.info("Configured SearchIndexer: excluders=" + excluders
|
ss.info("Configured SearchIndexer: excluders=" + excluders
|
||||||
+ ", modifiers=" + modifiers + ", uriFinders=" + uriFinders);
|
+ ", modifiers=" + modifiers + ", uriFinders=" + uriFinders);
|
||||||
|
|
||||||
{ // >>>>>>> TODO
|
|
||||||
this.indexBuilder = (IndexBuilder) ctx
|
|
||||||
.getAttribute(IndexBuilder.class.getName());
|
|
||||||
|
|
||||||
this.indexBuilder.addIndexBuilderListener(new BridgeListener());
|
|
||||||
}
|
|
||||||
|
|
||||||
createAndFire(STARTUP);
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
ss.fatal("Failed to configure the SearchIndexer", e);
|
ss.fatal("Failed to configure the SearchIndexer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createAndFire(Event.Type type) {
|
|
||||||
listeners.fireEvent(new Event(type, getStatus()));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void loadConfiguration() throws ConfigurationBeanLoaderException {
|
private void loadConfiguration() throws ConfigurationBeanLoaderException {
|
||||||
ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader(
|
ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader(
|
||||||
ModelAccess.on(ctx).getOntModel(DISPLAY), ctx);
|
ModelAccess.on(ctx).getOntModel(DISPLAY), ctx);
|
||||||
|
@ -84,80 +95,36 @@ public class SearchIndexerImpl implements SearchIndexer {
|
||||||
uriFinders = beanLoader.loadAll(IndexingUriFinder.class);
|
uriFinders = beanLoader.loadAll(IndexingUriFinder.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/**
|
||||||
* (non-Javadoc)
|
* Use a filtered DAO factory, so only public data goes into the search
|
||||||
*
|
* index.
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer#
|
|
||||||
* scheduleUpdatesForUris(java.util.Collection)
|
|
||||||
*/
|
*/
|
||||||
|
private WebappDaoFactory getFilteredWebappDaoFactory() {
|
||||||
|
WebappDaoFactory rawWadf = ModelAccess.on(ctx).getWebappDaoFactory();
|
||||||
|
VitroFilters vf = VitroFilterUtils.getPublicFilter(ctx);
|
||||||
|
return new WebappDaoFactoryFiltering(rawWadf, vf);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void scheduleUpdatesForUris(Collection<String> uris) {
|
public void scheduleUpdatesForUris(Collection<String> uris) {
|
||||||
// TODO
|
log.debug("Schedule updates for " + uris.size() + " uris.");
|
||||||
for (String uri : uris) {
|
scheduler.scheduleTask(new UpdateUrisTask(uris, excluders, modifiers,
|
||||||
indexBuilder.addToChanged(uri);
|
wadf.getIndividualDao(), listeners, pool));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
*
|
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer#
|
|
||||||
* rebuildIndex()
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void rebuildIndex() {
|
public void rebuildIndex() {
|
||||||
// TODO
|
scheduler.scheduleTask(new RebuildIndexTask());
|
||||||
indexBuilder.doIndexRebuild();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
*
|
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer#
|
|
||||||
* pause()
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void pause() {
|
public void pause() {
|
||||||
// TODO
|
scheduler.pause();
|
||||||
indexBuilder.pause();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
*
|
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer#
|
|
||||||
* unpause()
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void unpause() {
|
public void unpause() {
|
||||||
// TODO
|
scheduler.unpause();
|
||||||
indexBuilder.unpause();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
*
|
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer#
|
|
||||||
* getStatus()
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public SearchIndexerStatus getStatus() {
|
|
||||||
// TODO
|
|
||||||
WorkLevelStamp workLevel = indexBuilder.getWorkLevel();
|
|
||||||
WorkLevel level = workLevel.getLevel();
|
|
||||||
Date since = workLevel.getSince();
|
|
||||||
if (level == WorkLevel.IDLE) {
|
|
||||||
return new SearchIndexerStatus(State.IDLE, since,
|
|
||||||
new SearchIndexerStatus.NoCounts());
|
|
||||||
} else {
|
|
||||||
return new SearchIndexerStatus(State.PROCESSING_URIS, since,
|
|
||||||
new SearchIndexerStatus.UriCounts(1, 2, 3, 6));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -170,63 +137,264 @@ public class SearchIndexerImpl implements SearchIndexer {
|
||||||
listeners.remove(listener);
|
listeners.remove(listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
*
|
|
||||||
* @see
|
|
||||||
* edu.cornell.mannlib.vitro.webapp.modules.Application.Component#shutdown
|
|
||||||
* (edu.cornell.mannlib.vitro.webapp.modules.Application)
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public void shutdown(Application application) {
|
public SearchIndexerStatus getStatus() {
|
||||||
// TODO
|
return taskQueue.getStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void shutdown(Application application) {
|
||||||
|
SearchIndexerStatus status = taskQueue.getStatus();
|
||||||
|
if (status.getState() != State.SHUTDOWN) {
|
||||||
|
listeners.fireEvent(new Event(SHUTDOWN_REQUESTED, status));
|
||||||
|
|
||||||
|
taskQueue.shutdown();
|
||||||
|
|
||||||
|
for (DocumentModifier dm : modifiers) {
|
||||||
|
try {
|
||||||
|
dm.shutdown();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to shut down document modifier " + dm, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
listeners.fireEvent(new Event(SHUTDOWN_COMPLETE, taskQueue
|
||||||
|
.getStatus()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Helper classes
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple thread-safe list of event listeners.
|
* A simple thread-safe list of event listeners. All methods are
|
||||||
|
* synchronized.
|
||||||
*/
|
*/
|
||||||
private static class ListenerList {
|
public static class ListenerList {
|
||||||
private final List<Listener> list;
|
private final List<Listener> list;
|
||||||
|
|
||||||
public ListenerList() {
|
public ListenerList() {
|
||||||
list = Collections.synchronizedList(new ArrayList<Listener>());
|
list = new ArrayList<Listener>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(Listener l) {
|
public synchronized void add(Listener l) {
|
||||||
list.add(l);
|
list.add(l);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void remove(Listener l) {
|
public synchronized void remove(Listener l) {
|
||||||
list.remove(l);
|
list.remove(l);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void fireEvent(Event e) {
|
public synchronized void fireEvent(Event event) {
|
||||||
synchronized (list) {
|
for (Listener l : list) {
|
||||||
for (Listener l : list) {
|
try {
|
||||||
l.receiveSearchIndexerEvent(e);
|
l.receiveSearchIndexerEvent(event);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to deliver event '" + event
|
||||||
|
+ "' to listener '" + l + "'", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class BridgeListener implements IndexingEventListener {
|
/**
|
||||||
@Override
|
* A scheduler either collects tasks (if paused), or passes them to the
|
||||||
public void notifyOfIndexingEvent(EventTypes ie) {
|
* queue (if not paused). All methods are synchronized.
|
||||||
switch (ie) {
|
*/
|
||||||
case START_UPDATE:
|
private static class Scheduler {
|
||||||
createAndFire(START_PROCESSING_STATEMENTS);
|
private final TaskQueue taskQueue;
|
||||||
break;
|
private final List<Task> deferredQueue;
|
||||||
case FINISHED_UPDATE:
|
private volatile boolean paused;
|
||||||
createAndFire(STOP_PROCESSING_STATEMENTS);
|
|
||||||
break;
|
public Scheduler(TaskQueue taskQueue) {
|
||||||
case START_FULL_REBUILD:
|
this.taskQueue = taskQueue;
|
||||||
createAndFire(REBUILD_REQUESTED);
|
this.deferredQueue = new ArrayList<Task>();
|
||||||
createAndFire(START_PROCESSING_STATEMENTS);
|
}
|
||||||
break;
|
|
||||||
default: // FINISH_FULL_REBUILD
|
public synchronized void scheduleTask(Task task) {
|
||||||
createAndFire(STOP_PROCESSING_STATEMENTS);
|
if (paused) {
|
||||||
break;
|
deferredQueue.add(task);
|
||||||
|
log.debug("added task to deferred queue: " + task);
|
||||||
|
} else {
|
||||||
|
taskQueue.scheduleTask(task);
|
||||||
|
log.debug("added task to task queue: " + task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void pause() {
|
||||||
|
paused = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void unpause() {
|
||||||
|
paused = false;
|
||||||
|
for (Task task : deferredQueue) {
|
||||||
|
taskQueue.scheduleTask(task);
|
||||||
|
log.debug("moved task from deferred queue to task queue: " + task);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A single-threaded task queue that can tell us the status of the current
|
||||||
|
* task.
|
||||||
|
*
|
||||||
|
* If no current task, it can return a status of IDLE or SHUTDOWN.
|
||||||
|
*/
|
||||||
|
private static class TaskQueue {
|
||||||
|
private final ExecutorService queue = Executors
|
||||||
|
.newSingleThreadExecutor(new VitroBackgroundThread.Factory(
|
||||||
|
"SearchIndexer_TaskQueue"));
|
||||||
|
|
||||||
|
private AtomicReference<QueueStatus> current = new AtomicReference<>(
|
||||||
|
new QueueStatus(SearchIndexerStatus.idle()));
|
||||||
|
|
||||||
|
public void scheduleTask(Task task) {
|
||||||
|
try {
|
||||||
|
queue.execute(new TaskWrapper(task));
|
||||||
|
} catch (RejectedExecutionException e) {
|
||||||
|
log.warn("Search Indexer task was rejected: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchIndexerStatus getStatus() {
|
||||||
|
return current.get().getStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void shutdown() {
|
||||||
|
try {
|
||||||
|
queue.shutdownNow();
|
||||||
|
boolean terminated = queue.awaitTermination(1, MINUTES);
|
||||||
|
if (!terminated) {
|
||||||
|
log.warn("SearchIndexer task queue did not shut down "
|
||||||
|
+ "within 1 minute.");
|
||||||
|
}
|
||||||
|
current.set(new QueueStatus(SearchIndexerStatus.shutdown()));
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
log.warn("call to 'awaitTermination' was interrupted.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** When this wrapper is run, we will know the current task and status. */
|
||||||
|
private class TaskWrapper implements Runnable {
|
||||||
|
private final Task task;
|
||||||
|
|
||||||
|
public TaskWrapper(Task task) {
|
||||||
|
this.task = task;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
current.set(new QueueStatus(task));
|
||||||
|
log.debug("starting task: " + task);
|
||||||
|
task.run();
|
||||||
|
current.set(new QueueStatus(SearchIndexerStatus.idle()));
|
||||||
|
log.debug("ended task: " + task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Either a specific status or a task to interrogate. */
|
||||||
|
private class QueueStatus {
|
||||||
|
private final Task task;
|
||||||
|
private final SearchIndexerStatus status;
|
||||||
|
|
||||||
|
public QueueStatus(Task task) {
|
||||||
|
this.task = Objects.requireNonNull(task);
|
||||||
|
this.status = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueueStatus(SearchIndexerStatus status) {
|
||||||
|
this.task = null;
|
||||||
|
this.status = Objects.requireNonNull(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchIndexerStatus getStatus() {
|
||||||
|
if (task != null) {
|
||||||
|
return task.getStatus();
|
||||||
|
} else {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static interface Task extends Runnable {
|
||||||
|
public SearchIndexerStatus getStatus();
|
||||||
|
|
||||||
|
public void notifyWorkUnitCompletion(Runnable workUnit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A thread pool for handling many small units of work submitted by a task.
|
||||||
|
*
|
||||||
|
* The task is notified as each unit completes.
|
||||||
|
*
|
||||||
|
* Only one task is active at a time, so the task can simply wait until this
|
||||||
|
* pool is idle to know that all of its units have completed.
|
||||||
|
*
|
||||||
|
* When shutting down, no attempt is made to interrupt the currently
|
||||||
|
* executing work units, since they are assumed to be small.
|
||||||
|
*/
|
||||||
|
public static class WorkerThreadPool {
|
||||||
|
private final ThreadPoolExecutor pool;
|
||||||
|
|
||||||
|
public WorkerThreadPool() {
|
||||||
|
this.pool = new ThreadPoolExecutor(10, 10, 10, TimeUnit.SECONDS,
|
||||||
|
new ArrayBlockingQueue<Runnable>(50),
|
||||||
|
new VitroBackgroundThread.Factory(
|
||||||
|
"SearchIndexer_ThreadPool"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void submit(Runnable workUnit, Task task) {
|
||||||
|
pool.execute(new WorkUnitWrapper(workUnit, task));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void waitUntilIdle() {
|
||||||
|
for (int i = 0; i < 60; i++) {
|
||||||
|
if (pool.getActiveCount() == 0) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void shutdown() {
|
||||||
|
pool.shutdown();
|
||||||
|
try {
|
||||||
|
boolean terminated = pool.awaitTermination(1, MINUTES);
|
||||||
|
if (!terminated) {
|
||||||
|
log.warn("SearchIndexer thread pool did not shut down "
|
||||||
|
+ "within 1 minute.");
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class WorkUnitWrapper implements Runnable {
|
||||||
|
private final Runnable workUnit;
|
||||||
|
private final Task task;
|
||||||
|
|
||||||
|
public WorkUnitWrapper(Runnable workUnit, Task task) {
|
||||||
|
this.workUnit = workUnit;
|
||||||
|
this.task = task;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
workUnit.run();
|
||||||
|
} finally {
|
||||||
|
task.notifyWorkUnitCompletion(workUnit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
import javax.servlet.ServletContextEvent;
|
import javax.servlet.ServletContextEvent;
|
||||||
|
@ -26,11 +25,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
|
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
|
||||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.SearchIndexer;
|
import edu.cornell.mannlib.vitro.webapp.search.SearchIndexer;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener;
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
|
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
|
||||||
import edu.cornell.mannlib.vitro.webapp.startup.ComponentStartupStatusImpl;
|
import edu.cornell.mannlib.vitro.webapp.startup.ComponentStartupStatusImpl;
|
||||||
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
|
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
|
||||||
|
@ -40,8 +36,8 @@ import edu.cornell.mannlib.vitro.webapp.utils.developer.Key;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisabledModelChangeListener;
|
import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisabledModelChangeListener;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO
|
* TODO A silly implementation that just wraps the old IndexBuilder with a new
|
||||||
* A silly implementation that just wraps the old IndexBuilder with a new SearchIndexerImpl.
|
* SearchIndexerImpl.
|
||||||
*/
|
*/
|
||||||
public class SearchIndexerSetup implements ServletContextListener {
|
public class SearchIndexerSetup implements ServletContextListener {
|
||||||
private static final Log log = LogFactory.getLog(SearchIndexerSetup.class);
|
private static final Log log = LogFactory.getLog(SearchIndexerSetup.class);
|
||||||
|
@ -58,76 +54,52 @@ public class SearchIndexerSetup implements ServletContextListener {
|
||||||
|
|
||||||
ServletContext context = sce.getServletContext();
|
ServletContext context = sce.getServletContext();
|
||||||
StartupStatus ss = StartupStatus.getBean(context);
|
StartupStatus ss = StartupStatus.getBean(context);
|
||||||
SearchEngine searchEngine = ApplicationUtils.instance().getSearchEngine();
|
SearchEngine searchEngine = ApplicationUtils.instance()
|
||||||
|
.getSearchEngine();
|
||||||
|
|
||||||
try {
|
{ // >>>>> TODO
|
||||||
IndividualToSearchDocument indToSearchDoc = setupTranslation();
|
try {
|
||||||
|
// /* setup search indexer */
|
||||||
/* setup search indexer */
|
// SearchIndexer searchIndexer = new SearchIndexer(searchEngine,
|
||||||
SearchIndexer searchIndexer = new SearchIndexer(searchEngine, indToSearchDoc);
|
// indToSearchDoc);
|
||||||
|
//
|
||||||
// This is where the builder gets the list of places to try to
|
// // Make the IndexBuilder
|
||||||
// get objects to index. It is filtered so that non-public text
|
// IndexBuilder builder = new IndexBuilder(searchIndexer, wadf,
|
||||||
// does not get into the search index.
|
// uriFinders);
|
||||||
WebappDaoFactory wadf = ModelAccess.on(context)
|
//
|
||||||
.getWebappDaoFactory();
|
// // Create listener to notify index builder of changes to model
|
||||||
VitroFilters vf = VitroFilterUtils.getPublicFilter(context);
|
// // (can be disabled by developer setting.)
|
||||||
wadf = new WebappDaoFactoryFiltering(wadf, vf);
|
// ModelContext
|
||||||
|
// .registerListenerForChanges(
|
||||||
// make objects that will find additional URIs for context nodes etc
|
// context,
|
||||||
List<IndexingUriFinder> uriFinders = loadUriFinders();
|
// new DeveloperDisabledModelChangeListener(
|
||||||
|
// new SearchReindexingListener(builder),
|
||||||
// Make the IndexBuilder
|
// Key.SEARCH_INDEX_SUPPRESS_MODEL_CHANGE_LISTENER));
|
||||||
IndexBuilder builder = new IndexBuilder(searchIndexer, wadf,
|
//
|
||||||
uriFinders);
|
// ss.info(this, "Setup of search indexer completed.");
|
||||||
// Save it to the servlet context so we can access it later in the
|
//
|
||||||
// webapp.
|
} catch (Throwable e) {
|
||||||
context.setAttribute(IndexBuilder.class.getName(), builder);
|
ss.fatal(this, "could not setup search engine", e);
|
||||||
|
}
|
||||||
// Create listener to notify index builder of changes to model
|
|
||||||
// (can be disabled by developer setting.)
|
|
||||||
ModelContext.registerListenerForChanges(context,
|
|
||||||
new DeveloperDisabledModelChangeListener(
|
|
||||||
new SearchReindexingListener(builder),
|
|
||||||
Key.SEARCH_INDEX_SUPPRESS_MODEL_CHANGE_LISTENER));
|
|
||||||
|
|
||||||
ss.info(this, "Setup of search indexer completed.");
|
|
||||||
|
|
||||||
ApplicationUtils.instance().getSearchIndexer().startup(ApplicationUtils.instance(), new ComponentStartupStatusImpl(this, ss));
|
|
||||||
} catch (Throwable e) {
|
|
||||||
ss.fatal(this, "could not setup search engine", e);
|
|
||||||
}
|
}
|
||||||
|
ApplicationUtils
|
||||||
|
.instance()
|
||||||
|
.getSearchIndexer()
|
||||||
|
.startup(ApplicationUtils.instance(),
|
||||||
|
new ComponentStartupStatusImpl(this, ss));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void contextDestroyed(ServletContextEvent sce) {
|
public void contextDestroyed(ServletContextEvent sce) {
|
||||||
IndexBuilder builder = (IndexBuilder) sce.getServletContext()
|
ApplicationUtils.instance().getSearchIndexer()
|
||||||
.getAttribute(IndexBuilder.class.getName());
|
.shutdown(ApplicationUtils.instance());
|
||||||
if (builder != null)
|
|
||||||
builder.stopIndexingThread();
|
|
||||||
|
|
||||||
}
|
{ // >>>>> TODO
|
||||||
|
IndexBuilder builder = (IndexBuilder) sce.getServletContext()
|
||||||
private IndividualToSearchDocument setupTranslation() {
|
.getAttribute(IndexBuilder.class.getName());
|
||||||
try {
|
if (builder != null)
|
||||||
Set<SearchIndexExcluder> excluders = beanLoader.loadAll(SearchIndexExcluder.class);
|
builder.stopIndexingThread();
|
||||||
log.debug("Excludes: (" + excluders.size() + ") " + excluders);
|
|
||||||
|
|
||||||
Set<DocumentModifier> modifiers = beanLoader.loadAll(DocumentModifier.class);
|
|
||||||
log.debug("Modifiers: (" + modifiers.size() + ") " + modifiers);
|
|
||||||
|
|
||||||
return new IndividualToSearchDocument(new ArrayList<>(excluders), new ArrayList<>(modifiers));
|
|
||||||
} catch (ConfigurationBeanLoaderException e) {
|
|
||||||
throw new RuntimeException("Failed to configure the SearchIndexer", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<IndexingUriFinder> loadUriFinders() {
|
|
||||||
try {
|
|
||||||
return new ArrayList<>(beanLoader.loadAll(IndexingUriFinder.class));
|
|
||||||
} catch (ConfigurationBeanLoaderException e) {
|
|
||||||
throw new RuntimeException("Failed to configure the SearchIndexer", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||||
*/
|
*/
|
||||||
public class ExcludeBasedOnType implements SearchIndexExcluder {
|
public class ExcludeBasedOnType implements SearchIndexExcluder {
|
||||||
|
|
||||||
private static final String SKIP_MSG = "skipping due to type.";
|
private static final String SKIP_MSG = "skipping due to type: ";
|
||||||
|
|
||||||
private final Set<String> typeURIs = new HashSet<>();
|
private final Set<String> typeURIs = new HashSet<>();
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
|
||||||
return DONT_EXCLUDE;
|
return DONT_EXCLUDE;
|
||||||
}
|
}
|
||||||
if (typeURIinExcludeList(ind.getVClass())) {
|
if (typeURIinExcludeList(ind.getVClass())) {
|
||||||
return SKIP_MSG;
|
return SKIP_MSG + ind.getVClass();
|
||||||
}
|
}
|
||||||
|
|
||||||
List<VClass> vclasses = new ArrayList<>();
|
List<VClass> vclasses = new ArrayList<>();
|
||||||
|
@ -44,7 +44,7 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
|
||||||
|
|
||||||
for (VClass vclz : vclasses) {
|
for (VClass vclz : vclasses) {
|
||||||
if (typeURIinExcludeList(vclz))
|
if (typeURIinExcludeList(vclz))
|
||||||
return SKIP_MSG;
|
return SKIP_MSG + vclz;
|
||||||
}
|
}
|
||||||
|
|
||||||
return DONT_EXCLUDE;
|
return DONT_EXCLUDE;
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchindex.tasks;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO
|
||||||
|
*/
|
||||||
|
public class RebuildIndexTask implements Task {
|
||||||
|
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see java.lang.Runnable#run()
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
throw new RuntimeException("RebuildIndexTask.run() not implemented.");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task#getStatus()
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public SearchIndexerStatus getStatus() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
throw new RuntimeException(
|
||||||
|
"RebuildIndexTask.getStatus() not implemented.");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void notifyWorkUnitCompletion(Runnable workUnit) {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
throw new RuntimeException("RebuildIndexTask.notifyWorkUnitCompletion() not implemented.");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,195 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchindex.tasks;
|
||||||
|
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXT;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXTUNSTEMMED;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.CLASSGROUP_URI;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.DOCID;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.INDEXEDTIME;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.MOST_SPECIFIC_TYPE_URIS;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE_SINGLE_VALUED;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.vocabulary.OWL;
|
||||||
|
import com.hp.hpl.jena.vocabulary.RDFS;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
||||||
|
|
||||||
|
public class UpdateDocumentWorkUnit implements Runnable {
|
||||||
|
private static final Log log = LogFactory
|
||||||
|
.getLog(UpdateDocumentWorkUnit.class);
|
||||||
|
|
||||||
|
private static final String URI_OWL_THING = OWL.Thing.getURI();
|
||||||
|
private static final String URI_DIFFERENT_FROM = OWL.differentFrom.getURI();
|
||||||
|
private static final String URI_RDFS_LABEL = RDFS.label.getURI();
|
||||||
|
|
||||||
|
private final Individual ind;
|
||||||
|
private final List<DocumentModifier> modifiers;
|
||||||
|
private final SearchEngine searchEngine;
|
||||||
|
|
||||||
|
public UpdateDocumentWorkUnit(Individual ind,
|
||||||
|
Collection<DocumentModifier> modifiers) {
|
||||||
|
this.ind = ind;
|
||||||
|
this.modifiers = new ArrayList<>(modifiers);
|
||||||
|
this.searchEngine = ApplicationUtils.instance().getSearchEngine();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Individual getInd() {
|
||||||
|
return ind;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
SearchInputDocument doc = searchEngine.createInputDocument();
|
||||||
|
|
||||||
|
addIdAndUri(doc);
|
||||||
|
addLabel(doc);
|
||||||
|
addClasses(doc);
|
||||||
|
addMostSpecificTypes(doc);
|
||||||
|
addObjectPropertyText(doc);
|
||||||
|
addDataPropertyText(doc);
|
||||||
|
addEntityBoost(doc);
|
||||||
|
|
||||||
|
for (DocumentModifier modifier : modifiers) {
|
||||||
|
modifier.modifyDocument(ind, doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
addIndexedTime(doc);
|
||||||
|
|
||||||
|
searchEngine.add(doc);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to add '" + ind + "' to the search index.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addIdAndUri(SearchInputDocument doc) {
|
||||||
|
doc.addField(DOCID, SearchIndexerUtils.getIdForUri(ind.getURI()));
|
||||||
|
doc.addField(URI, ind.getURI());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addLabel(SearchInputDocument doc) {
|
||||||
|
String name = ind.getRdfsLabel();
|
||||||
|
if (name == null) {
|
||||||
|
name = ind.getLocalName();
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.addField(NAME_RAW, name);
|
||||||
|
doc.addField(NAME_LOWERCASE_SINGLE_VALUED, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For each class that the individual belongs to, record the class URI, the
|
||||||
|
* class group URI, the class Name, and the class boost.
|
||||||
|
*/
|
||||||
|
private void addClasses(SearchInputDocument doc) {
|
||||||
|
List<VClass> vclasses = ind.getVClasses(false);
|
||||||
|
if (vclasses == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (VClass clz : vclasses) {
|
||||||
|
String classUri = clz.getURI();
|
||||||
|
if (classUri == null || URI_OWL_THING.equals(classUri)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
doc.addField(RDFTYPE, classUri);
|
||||||
|
|
||||||
|
String classGroupUri = clz.getGroupURI();
|
||||||
|
if (classGroupUri != null) {
|
||||||
|
doc.addField(CLASSGROUP_URI, classGroupUri);
|
||||||
|
}
|
||||||
|
|
||||||
|
addToAlltext(doc, clz.getName());
|
||||||
|
|
||||||
|
Float boost = clz.getSearchBoost();
|
||||||
|
if (boost != null) {
|
||||||
|
doc.setDocumentBoost(doc.getDocumentBoost() + boost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addMostSpecificTypes(SearchInputDocument doc) {
|
||||||
|
List<String> mstURIs = ind.getMostSpecificTypeURIs();
|
||||||
|
if (mstURIs != null) {
|
||||||
|
for (String typeURI : mstURIs) {
|
||||||
|
if (StringUtils.isNotBlank(typeURI)) {
|
||||||
|
doc.addField(MOST_SPECIFIC_TYPE_URIS, typeURI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addObjectPropertyText(SearchInputDocument doc) {
|
||||||
|
List<ObjectPropertyStatement> stmts = ind.getObjectPropertyStatements();
|
||||||
|
if (stmts == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ObjectPropertyStatement stmt : stmts) {
|
||||||
|
if (URI_DIFFERENT_FROM.equals(stmt.getPropertyURI())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
addToAlltext(doc, stmt.getObject().getRdfsLabel());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDataPropertyText(SearchInputDocument doc) {
|
||||||
|
List<DataPropertyStatement> stmts = ind.getDataPropertyStatements();
|
||||||
|
if (stmts == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (DataPropertyStatement stmt : stmts) {
|
||||||
|
if (stmt.getDatapropURI().equals(URI_RDFS_LABEL)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
addToAlltext(doc, stmt.getData());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addEntityBoost(SearchInputDocument doc) {
|
||||||
|
Float boost = ind.getSearchBoost();
|
||||||
|
if(boost != null && ! boost.equals(0.0F)) {
|
||||||
|
doc.setDocumentBoost(boost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addIndexedTime(SearchInputDocument doc) {
|
||||||
|
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addToAlltext(SearchInputDocument doc, String raw) {
|
||||||
|
if (StringUtils.isBlank(raw)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
String clean = Jsoup.parse(raw).text();
|
||||||
|
if (StringUtils.isBlank(clean)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
doc.addField(ALLTEXT, clean);
|
||||||
|
doc.addField(ALLTEXTUNSTEMMED, clean);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,199 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchindex.tasks;
|
||||||
|
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.START_PROCESSING_URIS;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STOP_PROCESSING_URIS;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.PROCESSING_URIS;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.UriCounts;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a list of URIs, remove the ones that don't belong in the index and
|
||||||
|
* update the ones that do belong.
|
||||||
|
*
|
||||||
|
* A URI doesn't belong in the index if there is no individual with that URI, or
|
||||||
|
* if the individual has no VClasses assigned to it, or if the individual is
|
||||||
|
* excluded by one of the excluders.
|
||||||
|
*
|
||||||
|
* Deletions are done synchronously, but updates are scheduled to run on the
|
||||||
|
* thread pool.
|
||||||
|
*/
|
||||||
|
public class UpdateUrisTask implements Task {
|
||||||
|
private static final Log log = LogFactory.getLog(UpdateUrisTask.class);
|
||||||
|
|
||||||
|
private final Set<String> uris;
|
||||||
|
private final IndividualDao indDao;
|
||||||
|
private final List<SearchIndexExcluder> excluders;
|
||||||
|
private final List<DocumentModifier> modifiers;
|
||||||
|
private final ListenerList listeners;
|
||||||
|
private final WorkerThreadPool pool;
|
||||||
|
|
||||||
|
private final Status status;
|
||||||
|
private final SearchEngine searchEngine;
|
||||||
|
|
||||||
|
public UpdateUrisTask(Collection<String> uris,
|
||||||
|
Collection<SearchIndexExcluder> excluders,
|
||||||
|
Collection<DocumentModifier> modifiers, IndividualDao indDao,
|
||||||
|
ListenerList listeners, WorkerThreadPool pool) {
|
||||||
|
this.uris = new HashSet<>(uris);
|
||||||
|
this.excluders = new ArrayList<>(excluders);
|
||||||
|
this.modifiers = new ArrayList<>(modifiers);
|
||||||
|
this.indDao = indDao;
|
||||||
|
this.listeners = listeners;
|
||||||
|
this.pool = pool;
|
||||||
|
|
||||||
|
this.status = new Status(uris.size());
|
||||||
|
|
||||||
|
this.searchEngine = ApplicationUtils.instance().getSearchEngine();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
listeners.fireEvent(new Event(START_PROCESSING_URIS, status
|
||||||
|
.getSearchIndexerStatus()));
|
||||||
|
for (String uri : uris) {
|
||||||
|
if (isInterrupted()) {
|
||||||
|
log.info("Interrupted: " + status.getSearchIndexerStatus());
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
Individual ind = getIndividual(uri);
|
||||||
|
if (ind == null || hasNoClass(ind) || isExcluded(ind)) {
|
||||||
|
deleteDocument(uri);
|
||||||
|
} else {
|
||||||
|
updateDocument(ind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pool.waitUntilIdle();
|
||||||
|
listeners.fireEvent(new Event(STOP_PROCESSING_URIS, status
|
||||||
|
.getSearchIndexerStatus()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isInterrupted() {
|
||||||
|
if (Thread.interrupted()) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Individual getIndividual(String uri) {
|
||||||
|
Individual ind = indDao.getIndividualByURI(uri);
|
||||||
|
if (ind == null) {
|
||||||
|
log.debug("Found no individual for '" + uri + "'");
|
||||||
|
}
|
||||||
|
return ind;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasNoClass(Individual ind) {
|
||||||
|
List<VClass> vclasses = ind.getVClasses(false);
|
||||||
|
if (vclasses == null || vclasses.isEmpty()) {
|
||||||
|
log.debug("Individual " + ind + " has no classes.");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isExcluded(Individual ind) {
|
||||||
|
for (SearchIndexExcluder excluder : excluders) {
|
||||||
|
String message = excluder.checkForExclusion(ind);
|
||||||
|
if (message != SearchIndexExcluder.DONT_EXCLUDE) {
|
||||||
|
log.debug("Excluded " + ind + " because " + message);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A delete is fast enough to be done synchronously. */
|
||||||
|
private void deleteDocument(String uri) {
|
||||||
|
try {
|
||||||
|
searchEngine.deleteById(SearchIndexerUtils.getIdForUri(uri));
|
||||||
|
status.incrementDeletes();
|
||||||
|
log.debug("deleted '" + uri + "' from search index.");
|
||||||
|
} catch (SearchEngineException e) {
|
||||||
|
log.warn("Failed to delete '" + uri + "' from search index", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateDocument(Individual ind) {
|
||||||
|
Runnable workUnit = new UpdateDocumentWorkUnit(ind, modifiers);
|
||||||
|
pool.submit(workUnit, this);
|
||||||
|
log.debug("scheduled update to " + ind);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void notifyWorkUnitCompletion(Runnable workUnit) {
|
||||||
|
log.debug("completed update to "
|
||||||
|
+ ((UpdateDocumentWorkUnit) workUnit).getInd());
|
||||||
|
status.incrementUpdates();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchIndexerStatus getStatus() {
|
||||||
|
return status.getSearchIndexerStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// helper classes
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A thread-safe collection of status information. All methods are
|
||||||
|
* synchronized.
|
||||||
|
*/
|
||||||
|
private static class Status {
|
||||||
|
private final int total;
|
||||||
|
private int updated = 0;
|
||||||
|
private int deleted = 0;
|
||||||
|
private Date since = new Date();
|
||||||
|
|
||||||
|
public Status(int total) {
|
||||||
|
this.total = total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void incrementUpdates() {
|
||||||
|
updated++;
|
||||||
|
since = new Date();
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void incrementDeletes() {
|
||||||
|
deleted++;
|
||||||
|
since = new Date();
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized SearchIndexerStatus getSearchIndexerStatus() {
|
||||||
|
int remaining = total - updated - deleted;
|
||||||
|
return new SearchIndexerStatus(PROCESSING_URIS, since,
|
||||||
|
new UriCounts(deleted, updated, remaining, total));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue