Changes to Entity2LuceneDoc
This commit is contained in:
parent
54fa664a24
commit
3843886b87
1 changed files with 105 additions and 150 deletions
|
@ -15,11 +15,14 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.vocabulary.OWL;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
@ -55,8 +58,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
public static String NAMERAW = "nameraw";
|
public static String NAMERAW = "nameraw";
|
||||||
/** portal ( 2 ^ portalId ) */
|
/** portal ( 2 ^ portalId ) */
|
||||||
public static String PORTAL = "portal";
|
public static String PORTAL = "portal";
|
||||||
/** Flag 2 (legacy, only used at Cornell) */
|
|
||||||
public static String FLAG2 = "FLAG2";
|
|
||||||
/** time of index in msec since epoc */
|
/** time of index in msec since epoc */
|
||||||
public static String INDEXEDTIME= "indexedTime";
|
public static String INDEXEDTIME= "indexedTime";
|
||||||
/** timekey of entity in yyyymmddhhmm */
|
/** timekey of entity in yyyymmddhhmm */
|
||||||
|
@ -74,6 +75,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
public static final String KEYWORDS = "KEYWORDS";
|
public static final String KEYWORDS = "KEYWORDS";
|
||||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||||
public static final String THUMBNAIL = "THUMBNAIL";
|
public static final String THUMBNAIL = "THUMBNAIL";
|
||||||
|
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||||
|
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
|
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
|
||||||
|
@ -102,24 +105,64 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
|
|
||||||
//DocId
|
//DocId
|
||||||
String id = ent.getURI();
|
String id = ent.getURI();
|
||||||
|
log.debug("translating " + id);
|
||||||
|
|
||||||
if( id == null ){
|
if( id == null ){
|
||||||
log.debug("cannot translate bnodes");
|
log.debug("cannot add individuals without URIs to lucene index");
|
||||||
throw new IndexingException("Not indexing bnodes");
|
return null;
|
||||||
|
}else if( id.startsWith( VitroVocabulary.vitroURI )
|
||||||
|
|| id.startsWith( VitroVocabulary.VITRO_PUBLIC )
|
||||||
|
|| id.startsWith( VitroVocabulary.PSEUDO_BNODE_NS)
|
||||||
|
|| id.startsWith( OWL.NS ) ){
|
||||||
|
log.debug("not indxing because of namespace:" + id );
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Types and ClassGroup */
|
||||||
|
boolean prohibited = false;
|
||||||
List<VClass> vclasses = ent.getVClasses(false);
|
List<VClass> vclasses = ent.getVClasses(false);
|
||||||
for( VClass vclass : vclasses ){
|
for( VClass clz : vclasses){
|
||||||
if( classesProhibitedFromSearch.isClassProhibited(vclass.getURI()) ){
|
if( clz.getURI() == null ){
|
||||||
|
continue;
|
||||||
|
}else if( OWL.Thing.getURI().equals( clz.getURI()) ){
|
||||||
|
//index individuals of type owl:Thing, just don't add owl:Thing the type field in the index
|
||||||
|
continue;
|
||||||
|
} else if ( clz.getURI().startsWith( OWL.NS ) ){
|
||||||
|
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||||
return null;
|
return null;
|
||||||
|
}else if( clz.getURI().startsWith( VitroVocabulary.vitroURI )
|
||||||
|
|| clz.getURI().startsWith( VitroVocabulary.VITRO_PUBLIC )
|
||||||
|
|| clz.getURI().startsWith( VitroVocabulary.PSEUDO_BNODE_NS) ){
|
||||||
|
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||||
|
return null;
|
||||||
|
}else{
|
||||||
|
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()) )
|
||||||
|
prohibited = true;
|
||||||
|
|
||||||
|
if( clz.getSearchBoost() != null )
|
||||||
|
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
||||||
|
|
||||||
|
doc.add( new Field(term.RDFTYPE, clz.getURI(),
|
||||||
|
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
|
||||||
|
if( clz.getName() != null )
|
||||||
|
classPublicNames = classPublicNames + " " + clz.getName();
|
||||||
|
|
||||||
|
//Classgroup URI
|
||||||
|
if( clz.getGroupURI() != null )
|
||||||
|
doc.add( new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
||||||
|
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
|
||||||
|
Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) );
|
||||||
|
|
||||||
|
/* lucene DOCID */
|
||||||
doc.add( new Field(term.DOCID, entClassName + id,
|
doc.add( new Field(term.DOCID, entClassName + id,
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
|
||||||
//vitro Id
|
//vitro Id
|
||||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
log.debug( id );
|
|
||||||
|
|
||||||
//java class
|
//java class
|
||||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
@ -153,23 +196,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
if( ent.getSearchBoost() != null && ent.getSearchBoost() != 0 )
|
if( ent.getSearchBoost() != null && ent.getSearchBoost() != 0 )
|
||||||
doc.setBoost(ent.getSearchBoost());
|
doc.setBoost(ent.getSearchBoost());
|
||||||
|
|
||||||
//rdf:type and ClassGroup
|
|
||||||
for( VClass clz : vclasses){
|
|
||||||
//document boost for given classes
|
|
||||||
if( clz.getSearchBoost() != null )
|
|
||||||
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
|
||||||
doc.add( new Field(term.RDFTYPE, clz.getURI(),
|
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
||||||
|
|
||||||
if( clz.getName() != null )
|
|
||||||
classPublicNames = classPublicNames + " " + clz.getName();
|
|
||||||
|
|
||||||
//Classgroup URI
|
|
||||||
if( clz.getGroupURI() != null )
|
|
||||||
doc.add( new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|
||||||
}
|
|
||||||
|
|
||||||
//Modification time
|
//Modification time
|
||||||
if( ent.getModTime() != null){
|
if( ent.getModTime() != null){
|
||||||
value = (new DateTime(ent.getModTime().getTime()))
|
value = (new DateTime(ent.getModTime().getTime()))
|
||||||
|
@ -191,13 +217,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
}catch (Exception ex){
|
}catch (Exception ex){
|
||||||
value = null;
|
value = null;
|
||||||
}
|
}
|
||||||
|
if( value != null )
|
||||||
if( value != null ){
|
|
||||||
doc.add( new Field(term.SUNRISE, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.SUNRISE, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
}else{
|
else
|
||||||
doc.add(new Field(term.SUNRISE, earliestTime, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add(new Field(term.SUNRISE, earliestTime, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
}
|
|
||||||
|
|
||||||
|
/* Sunset */
|
||||||
try{
|
try{
|
||||||
value = null;
|
value = null;
|
||||||
if( ent.getSunset() != null ){
|
if( ent.getSunset() != null ){
|
||||||
|
@ -207,13 +232,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
}catch (Exception ex){
|
}catch (Exception ex){
|
||||||
value = null;
|
value = null;
|
||||||
}
|
}
|
||||||
|
if( value != null )
|
||||||
if( value != null ){
|
|
||||||
doc.add( new Field(term.SUNSET, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.SUNSET, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
}else{
|
else
|
||||||
doc.add(new Field(term.SUNSET, latestTime, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add(new Field(term.SUNSET, latestTime, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
}
|
|
||||||
|
|
||||||
|
/* timekey */
|
||||||
try{
|
try{
|
||||||
value = null;
|
value = null;
|
||||||
if( ent.getTimekey() != null ){
|
if( ent.getTimekey() != null ){
|
||||||
|
@ -224,6 +248,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
log.error("could not save timekey " + ex);
|
log.error("could not save timekey " + ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* thumbnail */
|
||||||
try{
|
try{
|
||||||
value = null;
|
value = null;
|
||||||
if( ent.hasThumb() )
|
if( ent.hasThumb() )
|
||||||
|
@ -239,122 +264,52 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
|
||||||
//portal Flags
|
if( ! prohibited ){
|
||||||
doPortalFlags(ent, doc);
|
//ALLTEXT, all of the 'full text'
|
||||||
|
String t=null;
|
||||||
|
value ="";
|
||||||
|
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||||
|
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||||
|
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||||
|
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||||
|
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||||
|
value+= " "+ getKeyterms(ent);
|
||||||
|
|
||||||
//do flag 2 legacy, only used at Cornell
|
value+= " " + classPublicNames;
|
||||||
//doFlag2( ent, doc );
|
|
||||||
|
|
||||||
//ALLTEXT, all of the 'full text'
|
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
||||||
String t=null;
|
if (dataPropertyStatements != null) {
|
||||||
value ="";
|
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
while (dataPropertyStmtIter.hasNext()) {
|
||||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
||||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
|
||||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
|
||||||
value+= " "+ getKeyterms(ent);
|
|
||||||
|
|
||||||
value+= " " + classPublicNames;
|
|
||||||
|
|
||||||
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
|
||||||
if (dataPropertyStatements != null) {
|
|
||||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
|
||||||
while (dataPropertyStmtIter.hasNext()) {
|
|
||||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
|
||||||
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
|
||||||
if (objectPropertyStatements != null) {
|
|
||||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
|
||||||
while (objectPropertyStmtIter.hasNext()) {
|
|
||||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
|
||||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
|
||||||
continue;
|
|
||||||
try {
|
|
||||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.debug("could not index name of related object: " + e.getMessage());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
||||||
|
if (objectPropertyStatements != null) {
|
||||||
|
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||||
|
while (objectPropertyStmtIter.hasNext()) {
|
||||||
|
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||||
|
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||||
|
continue;
|
||||||
|
try {
|
||||||
|
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("could not index name of related object: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//stemmed terms
|
||||||
|
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
//unstemmed terms
|
||||||
|
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||||
}
|
}
|
||||||
|
|
||||||
//what else? linkAnchors? externalIds?
|
//flagX and portal flags are no longer indexed.
|
||||||
|
|
||||||
//stemmed terms
|
|
||||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
//unstemmed terms
|
|
||||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Flag two is a legacy field that is used only by Cornell.
|
|
||||||
* It is related to the old portal filtering.
|
|
||||||
*/
|
|
||||||
private void doFlag2(Individual ent, Document doc) {
|
|
||||||
String flag2Set = ent.getFlag2Set();
|
|
||||||
if( flag2Set != null && ! "".equals(flag2Set)){
|
|
||||||
for( String flag2Value : flag2Set.split(",")){
|
|
||||||
if( flag2Value != null ){
|
|
||||||
String value = flag2Value.replace(",", "");
|
|
||||||
if(!value.isEmpty())
|
|
||||||
doc.add( new Field(term.FLAG2, value, Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Splits up the entity's flag1 value into portal id and then
|
|
||||||
* adds the id to the doc.
|
|
||||||
*
|
|
||||||
* This should work fine with blank portal id and entities with
|
|
||||||
* the portal set to NULL.
|
|
||||||
*
|
|
||||||
* @param ent
|
|
||||||
* @param doc
|
|
||||||
*/
|
|
||||||
@SuppressWarnings("static-access")
|
|
||||||
private void doPortalFlags(Individual ent, Document doc){
|
|
||||||
/* this is the code to add the portal names, we don't use this
|
|
||||||
* now but since there is no consistant way to store flags you
|
|
||||||
* might want this in the future.
|
|
||||||
String portalIdsInCommaSeperatedList = ent.getFlag1Set();
|
|
||||||
|
|
||||||
if(portalIdsInCommaSeperatedList == null) return;
|
|
||||||
String[] portalNames = portalIdsInCommaSeperatedList.split(",");
|
|
||||||
for( String name : portalNames){
|
|
||||||
doc.add( new Field(term.PORTAL,name,Field.Store.NO,Field.Index.NOT_ANALYZED));
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* this is the code to store portal ids to the lucene index */
|
|
||||||
if( ent.getFlag1Numeric() == 0 )
|
|
||||||
return;
|
|
||||||
Long[] portalIds = FlagMathUtils.numeric2numerics( ent.getFlag1Numeric() );
|
|
||||||
if( portalIds == null || portalIds.length == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
log.debug("Flag 1 numeric: " + ent.getFlag1Numeric() + " for " + ent.getURI());
|
|
||||||
|
|
||||||
long id = -1;
|
|
||||||
for( Long idLong : portalIds){
|
|
||||||
if( idLong != null ){
|
|
||||||
id = idLong.longValue();
|
|
||||||
String numericPortal = Long.toString(id);
|
|
||||||
if( numericPortal != null ){
|
|
||||||
doc.add( new Field(term.PORTAL,numericPortal,
|
|
||||||
Field.Store.NO, Field.Index.NOT_ANALYZED));
|
|
||||||
log.debug("adding portal " + numericPortal + " to " + ent.getURI());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("static-access")
|
@SuppressWarnings("static-access")
|
||||||
public boolean canUnTranslate(Object result) {
|
public boolean canUnTranslate(Object result) {
|
||||||
if( result != null && result instanceof Document){
|
if( result != null && result instanceof Document){
|
||||||
|
|
Loading…
Add table
Reference in a new issue