Committing the following changes to dev-search-relevance branch
1) ContextNodesInclusionFactory NIHVIVO-2204, NIHVIVO-2333(partly) 2) IndividualToSolrDoc to replace Entity2LuceneDoc while constructing SolrInputDocuments NIHVIVO-2653 3) LuceneDocument (test utility that prints contents from Lucene/Solr documents)
This commit is contained in:
parent
4a209a12ce
commit
a3471b7102
12 changed files with 3613 additions and 2246 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -56,3 +56,122 @@ was
|
|||
will
|
||||
with
|
||||
|
||||
# these stopwords are taken
|
||||
# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2
|
||||
|
||||
about
|
||||
after
|
||||
all
|
||||
also
|
||||
an
|
||||
and
|
||||
another
|
||||
any
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
because
|
||||
been
|
||||
before
|
||||
being
|
||||
between
|
||||
both
|
||||
but
|
||||
by
|
||||
came
|
||||
can
|
||||
come
|
||||
could
|
||||
did
|
||||
do
|
||||
does
|
||||
each
|
||||
else
|
||||
for
|
||||
from
|
||||
get
|
||||
got
|
||||
has
|
||||
had
|
||||
he
|
||||
have
|
||||
her
|
||||
here
|
||||
him
|
||||
himself
|
||||
his
|
||||
how
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
its
|
||||
just
|
||||
like
|
||||
make
|
||||
many
|
||||
me
|
||||
might
|
||||
more
|
||||
most
|
||||
much
|
||||
must
|
||||
my
|
||||
never
|
||||
now
|
||||
of
|
||||
on
|
||||
only
|
||||
or
|
||||
other
|
||||
our
|
||||
out
|
||||
over
|
||||
re
|
||||
said
|
||||
same
|
||||
see
|
||||
should
|
||||
since
|
||||
so
|
||||
some
|
||||
still
|
||||
such
|
||||
take
|
||||
than
|
||||
that
|
||||
the
|
||||
their
|
||||
them
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
those
|
||||
through
|
||||
to
|
||||
too
|
||||
under
|
||||
up
|
||||
use
|
||||
very
|
||||
want
|
||||
was
|
||||
way
|
||||
we
|
||||
well
|
||||
were
|
||||
what
|
||||
when
|
||||
where
|
||||
which
|
||||
while
|
||||
who
|
||||
will
|
||||
with
|
||||
would
|
||||
you
|
||||
your
|
||||
|
|
|
@ -24,6 +24,9 @@ public class DisplayVocabulary {
|
|||
/* Individuals */
|
||||
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
|
||||
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
|
||||
|
||||
/* Page types */
|
||||
public static final String PAGE_TYPE = NS + "Page";
|
||||
public static final String HOME_PAGE_TYPE = NS + "HomePage";
|
||||
|
@ -35,8 +38,10 @@ public class DisplayVocabulary {
|
|||
/* Data Properties */
|
||||
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
|
||||
public static final String TITLE = NS + "title";
|
||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||
|
||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
|
||||
|
||||
/* URIs for storing menu.n3 */
|
||||
public static final String MENU_TEXT_RES = NS + "MenuText";
|
||||
public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText";
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search;
|
||||
|
||||
public class VitroTermNames {
|
||||
/** Id of entity, vclass or tab */
|
||||
public static String URI = "URI";
|
||||
/** lucene document id */
|
||||
public static String DOCID = "DocId";
|
||||
/** java class of the object that the Doc represents. */
|
||||
public static String JCLASS = "JCLASS";
|
||||
/** rdf:type */
|
||||
public static String RDFTYPE = "type";
|
||||
/** rdf:type */
|
||||
public static String CLASSGROUP_URI = "classgroup";
|
||||
/** Modtime from db */
|
||||
public static String MODTIME = "modTime";
|
||||
|
||||
/** time of index in msec since epoc */
|
||||
public static String INDEXEDTIME= "indexedTime";
|
||||
/** timekey of entity in yyyymmddhhmm */
|
||||
public static String TIMEKEY="TIMEKEY";
|
||||
/** time of sunset/end of entity in yyyymmddhhmm */
|
||||
public static String SUNSET="SUNSET";
|
||||
/** time of sunrise/start of entity in yyyymmddhhmm */
|
||||
public static String SUNRISE="SUNRISE";
|
||||
/** entity's moniker */
|
||||
public static String MONIKER="moniker";
|
||||
/** text for 'full text' search, this is stemmed */
|
||||
public static String ALLTEXT = "ALLTEXT";
|
||||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing context nodes **/
|
||||
public static final String CONTEXTNODE = "contextNode";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
public static final String THUMBNAIL = "THUMBNAIL";
|
||||
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAME = "classLocalName";
|
||||
|
||||
// Fields derived from rdfs:label
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||
|
||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||
}
|
|
@ -0,0 +1,637 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecution;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.query.QuerySolution;
|
||||
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||
import com.hp.hpl.jena.query.ResultSet;
|
||||
import com.hp.hpl.jena.query.Syntax;
|
||||
import com.hp.hpl.jena.rdf.model.Literal;
|
||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||
import com.hp.hpl.jena.rdf.model.Resource;
|
||||
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
|
||||
public class ContextNodesInclusionFactory {
|
||||
|
||||
private OntModel fullModel;
|
||||
private String contextNodeURI;
|
||||
private String query = "";
|
||||
|
||||
private static final String queryForEducationalTraining = "SELECT ?query WHERE {" +
|
||||
"?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }";
|
||||
|
||||
private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class);
|
||||
|
||||
public ContextNodesInclusionFactory(String contextNodeURI,
|
||||
OntModel displayOntModel, ServletContext context) {
|
||||
this.fullModel = ModelContext.getJenaOntModel(context);
|
||||
this.contextNodeURI = contextNodeURI;
|
||||
query = getQueryFromModel(contextNodeURI, displayOntModel);
|
||||
}
|
||||
|
||||
private String getQueryFromModel(String uri, OntModel displayOntModel) {
|
||||
|
||||
String resultQuery = "";
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource searchConfig = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("searchConfig", searchConfig);
|
||||
|
||||
Query query = QueryFactory.create(queryForEducationalTraining);
|
||||
displayOntModel.enterCriticalSection(Lock.READ);
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding);
|
||||
try{
|
||||
ResultSet results = qExec.execSelect();
|
||||
while(results.hasNext()){
|
||||
QuerySolution soln = results.nextSolution();
|
||||
Literal node = soln.getLiteral("query");
|
||||
if(node.isLiteral()){
|
||||
resultQuery = node.toString();
|
||||
}else{
|
||||
log.warn("unexpected literal in the object position for context node queries " + node.toString());
|
||||
}
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
displayOntModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return resultQuery.substring(0, resultQuery.length() - 3);
|
||||
}
|
||||
|
||||
|
||||
// public List<Field> getFieldValues(String uri, Model modelToQuery, List<String> queries){
|
||||
|
||||
//what do the queries need to be like?
|
||||
// SELECT ?field ?value WHERE ....
|
||||
|
||||
// what to do with multiple values for a field?
|
||||
|
||||
// }
|
||||
|
||||
|
||||
|
||||
//in different object:
|
||||
/*
|
||||
* get queries from somewhere
|
||||
* get model to run queries on
|
||||
* get list of individuals
|
||||
* for each individual:
|
||||
* fields = getFieldValues(uri, model, queiries)
|
||||
* index(fields)?
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
public String getPropertiesAssociatedWithPosition(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT " +
|
||||
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
|
||||
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
|
||||
" (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Position . "
|
||||
|
||||
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
|
||||
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
|
||||
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
|
||||
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
|
||||
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
|
||||
+ " OPTIONAL { ?c rdfs:label ?PositionLabel . } "
|
||||
|
||||
+ " } ORDER BY ?PositionLabel ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode hrJobTitle = soln.get("hrJobTitle");
|
||||
if(hrJobTitle != null){
|
||||
propertyValues.append(" " + hrJobTitle.toString());
|
||||
}else{
|
||||
log.warn("hrJobTitle is null ");
|
||||
}
|
||||
|
||||
RDFNode involvedOrganizationName = soln.get("involvedOrganizationName");
|
||||
if(involvedOrganizationName != null){
|
||||
propertyValues.append(" " + involvedOrganizationName.toString());
|
||||
}else{
|
||||
log.warn("involvedOrganizationName is null ");
|
||||
}
|
||||
|
||||
RDFNode positionForPerson = soln.get("positionForPerson");
|
||||
if(positionForPerson != null){
|
||||
propertyValues.append(" " + positionForPerson.toString());
|
||||
}else{
|
||||
log.warn("positionForPerson is null ");
|
||||
}
|
||||
|
||||
RDFNode positionInOrganization = soln.get("positionInOrganization");
|
||||
if(positionInOrganization != null){
|
||||
propertyValues.append(" " + positionInOrganization.toString());
|
||||
}else{
|
||||
log.warn("positionInOrganization is null ");
|
||||
}
|
||||
|
||||
RDFNode titleOrRole = soln.get("titleOrRole");
|
||||
if(titleOrRole != null){
|
||||
propertyValues.append(" " + titleOrRole.toString());
|
||||
}else{
|
||||
log.warn("titleOrRole is null ");
|
||||
}
|
||||
|
||||
RDFNode positionLabel = soln.get("positionLabel");
|
||||
if(positionLabel != null){
|
||||
propertyValues.append(" " + positionLabel.toString());
|
||||
}else{
|
||||
log.warn("positionLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithRelationship(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
|
||||
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Relationship . "
|
||||
|
||||
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
|
||||
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
|
||||
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
|
||||
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
|
||||
|
||||
+ " } ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode advisee = soln.get("advisee");
|
||||
if(advisee != null){
|
||||
propertyValues.append(" " + advisee.toString());
|
||||
}else{
|
||||
log.warn("advisee is null ");
|
||||
}
|
||||
|
||||
RDFNode degreeCandidacy = soln.get("degreeCandidacy");
|
||||
if(degreeCandidacy != null){
|
||||
propertyValues.append(" " + degreeCandidacy.toString());
|
||||
}else{
|
||||
log.warn("degreeCandidacy is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedAuthor = soln.get("linkedAuthor");
|
||||
if(linkedAuthor != null){
|
||||
propertyValues.append(" " + linkedAuthor.toString());
|
||||
}else{
|
||||
log.warn("linkedAuthor is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
|
||||
if(linkedInformationResource != null){
|
||||
propertyValues.append(" " + linkedInformationResource.toString());
|
||||
}else{
|
||||
log.warn("linkedInformationResource is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
|
||||
public String getPropertiesAssociatedWithAwardReceipt(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
|
||||
" (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:AwardReceipt . "
|
||||
|
||||
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
|
||||
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
|
||||
+ " OPTIONAL { ?c core:description ?Description . } . "
|
||||
+ " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . "
|
||||
|
||||
+ " } ORDER BY ?AwardReceiptLabel";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode awardConferredBy = soln.get("awardConferredBy");
|
||||
if(awardConferredBy != null){
|
||||
propertyValues.append(" " + awardConferredBy.toString());
|
||||
}else{
|
||||
log.warn("awardConferredBy is null ");
|
||||
}
|
||||
|
||||
RDFNode awardOrHonorFor = soln.get("awardOrHonorFor");
|
||||
if(awardOrHonorFor != null){
|
||||
propertyValues.append(" " + awardOrHonorFor.toString());
|
||||
}else{
|
||||
log.warn("awardOrHonorFor is null ");
|
||||
}
|
||||
|
||||
RDFNode description = soln.get("description");
|
||||
if(description != null){
|
||||
propertyValues.append(" " + description.toString());
|
||||
}else{
|
||||
log.warn("description is null ");
|
||||
}
|
||||
|
||||
RDFNode awardReceiptLabel = soln.get("awardReceiptLabel");
|
||||
if(awardReceiptLabel != null){
|
||||
propertyValues.append(" " + awardReceiptLabel.toString());
|
||||
}else{
|
||||
log.warn("awardReceiptLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithRole(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||
+ " ?Organization rdfs:label ?OrganizationLabel . "
|
||||
+ " } ORDER BY ?OrganizationLabel ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode organizationLabel = soln.get("organizationLabel");
|
||||
if(organizationLabel != null){
|
||||
propertyValues.append(" " + organizationLabel.toString());
|
||||
}else{
|
||||
log.warn("organizationLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getPropertiesAssociatedWithEducationalTraining(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
|
||||
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
|
||||
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
|
||||
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
|
||||
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
|
||||
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
|
||||
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
|
||||
|
||||
+"}";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode academicDegreeLabel = soln.get("academicDegreeLabel");
|
||||
if(academicDegreeLabel != null){
|
||||
propertyValues.append(" " + academicDegreeLabel.toString());
|
||||
}else{
|
||||
log.warn("academicDegreeLabel is null ");
|
||||
}
|
||||
|
||||
RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation");
|
||||
if(academicDegreeAbbreviation != null){
|
||||
propertyValues.append(" " + academicDegreeAbbreviation.toString());
|
||||
}else{
|
||||
log.warn("academicDegreeAbbreviation is null ");
|
||||
}
|
||||
|
||||
RDFNode majorField = soln.get("majorField");
|
||||
if(majorField != null){
|
||||
propertyValues.append(" " + majorField.toString());
|
||||
}else{
|
||||
log.warn("majorField is null ");
|
||||
}
|
||||
|
||||
RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool");
|
||||
if(trainingAtDepartmentOrSchool != null){
|
||||
propertyValues.append(" " + trainingAtDepartmentOrSchool.toString());
|
||||
}else{
|
||||
log.warn("trainingAtDepartmentOrSchool is null ");
|
||||
}
|
||||
|
||||
RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel");
|
||||
if(trainingAtOrganizationLabel != null){
|
||||
propertyValues.append(" " + trainingAtOrganizationLabel.toString());
|
||||
}else{
|
||||
log.warn("trainingAtOrganizationLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
return propertyValues.toString();
|
||||
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithInformationResource(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
|
||||
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
|
||||
"(str(?Features) as ?features) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type core:InformationResource . "
|
||||
|
||||
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
|
||||
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
|
||||
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
|
||||
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
|
||||
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
|
||||
|
||||
+"}";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode linkedAuthor = soln.get("linkedAuthor");
|
||||
if(linkedAuthor != null){
|
||||
propertyValues.append(" " + linkedAuthor.toString());
|
||||
}else{
|
||||
log.warn("linkedAuthor is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
|
||||
if(linkedInformationResource != null){
|
||||
propertyValues.append(" " + linkedInformationResource.toString());
|
||||
}else{
|
||||
log.warn("linkedInformationResource is null ");
|
||||
}
|
||||
|
||||
RDFNode editor = soln.get("editor");
|
||||
if(editor != null){
|
||||
propertyValues.append(" " + editor.toString());
|
||||
}else{
|
||||
log.warn("editor is null ");
|
||||
}
|
||||
|
||||
RDFNode subjectArea = soln.get("subjectArea");
|
||||
if(subjectArea != null){
|
||||
propertyValues.append(" " + subjectArea.toString());
|
||||
}else{
|
||||
log.warn("subjectArea is null ");
|
||||
}
|
||||
|
||||
RDFNode researchAreaOf = soln.get("researchAreaOf");
|
||||
if(researchAreaOf != null){
|
||||
propertyValues.append(" " + researchAreaOf.toString());
|
||||
}else{
|
||||
log.warn("researchAreaOf is null ");
|
||||
}
|
||||
|
||||
RDFNode features = soln.get("features");
|
||||
if(features != null){
|
||||
propertyValues.append(" " + features.toString());
|
||||
}else{
|
||||
log.warn("features is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
return propertyValues.toString();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -21,9 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
|||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument;
|
||||
|
||||
/**
|
||||
* This class expect that Entities passed to it will have
|
||||
|
@ -31,6 +35,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
|||
* be as full as possible.
|
||||
*/
|
||||
public class Entity2LuceneDoc implements Obj2DocIface{
|
||||
|
||||
/** These are the terms for the lucene index */
|
||||
public static class VitroLuceneTermNames{
|
||||
/** Id of entity, vclass or tab */
|
||||
|
@ -61,6 +66,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing context nodes **/
|
||||
public static final String CONTEXTNODE = "contextNode";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
|
@ -103,12 +110,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
private ProhibitedFromSearch classesProhibitedFromSearch;
|
||||
|
||||
private IndividualProhibitedFromSearch individualProhibited;
|
||||
|
||||
private ContextNodesInclusionFactory contextNodesInclusionFactory;
|
||||
|
||||
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
|
||||
|
||||
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||
|
||||
public Entity2LuceneDoc(
|
||||
ProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibited){
|
||||
IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){
|
||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||
this.individualProhibited = individualProhibited;
|
||||
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
|
||||
}
|
||||
|
||||
public boolean canTranslate(Object obj) {
|
||||
|
@ -123,7 +137,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
String value;
|
||||
Document doc = new Document();
|
||||
String classPublicNames = "";
|
||||
|
||||
LuceneDocument document = new LuceneDocument();
|
||||
|
||||
//DocId
|
||||
String id = ent.getURI();
|
||||
|
@ -166,12 +180,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
|
||||
Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
doc.add( typeField);
|
||||
document.setRDFTYPE(clz.getURI());
|
||||
|
||||
if(clz.getLocalName() != null){
|
||||
Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED);
|
||||
Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED);
|
||||
doc.add(classLocalName);
|
||||
doc.add(classLocalNameLowerCase);
|
||||
document.setCLASSLOCALNAME(clz.getLocalName());
|
||||
}
|
||||
|
||||
if( clz.getName() != null )
|
||||
|
@ -183,22 +199,29 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
// classGroupField.setBoost(FIELD_BOOST);
|
||||
doc.add(classGroupField);
|
||||
document.setCLASSGROUP_URI(clz.getGroupURI());
|
||||
}
|
||||
}
|
||||
}
|
||||
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
|
||||
Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) );
|
||||
document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0");
|
||||
|
||||
/* lucene DOCID */
|
||||
doc.add( new Field(term.DOCID, entClassName + id,
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
document.setDOCID(entClassName + id);
|
||||
|
||||
|
||||
//vitro Id
|
||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
document.setURI(id);
|
||||
|
||||
|
||||
//java class
|
||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
document.setJCLASS(entClassName);
|
||||
|
||||
// Individual label
|
||||
if( ent.getRdfsLabel() != null )
|
||||
value=ent.getRdfsLabel();
|
||||
|
@ -208,10 +231,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
|
||||
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
nameRaw.setBoost(NAME_BOOST);
|
||||
doc.add(nameRaw);
|
||||
document.setNAME(value);
|
||||
|
||||
// RY Not sure if we need to store this. For Solr, see schema.xml field definition.
|
||||
Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
|
@ -224,14 +247,30 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
|
||||
Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
nameStemmed.setBoost(NAME_BOOST);
|
||||
doc.add(nameStemmed);
|
||||
doc.add(nameStemmed);
|
||||
|
||||
String contextNodePropertyValues;
|
||||
|
||||
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
|
||||
|
||||
// }
|
||||
|
||||
Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
|
||||
doc.add(contextNodeInformation);
|
||||
document.setCONTEXTNODE(contextNodePropertyValues);
|
||||
|
||||
//Moniker
|
||||
|
||||
if(ent.getMoniker() != null){
|
||||
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
doc.add(moniker);
|
||||
document.setMONIKER(ent.getMoniker());
|
||||
}
|
||||
|
||||
//boost for entity
|
||||
|
@ -274,31 +313,45 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
document.setINDEXEDTIME(String.format("%019d", anon));
|
||||
|
||||
|
||||
if( ! prohibited ){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
StringBuffer alltext = new StringBuffer();
|
||||
|
||||
String t=null;
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
value+= " "+ getKeyterms(ent);
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t ));
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t));
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t ));
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t ));
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t ));
|
||||
value+= " "+ getKeyterms(ent);
|
||||
alltext.append("\t KEYTERMS: " + getKeyterms(ent));
|
||||
|
||||
value+= " " + classPublicNames;
|
||||
value+= " " + classPublicNames;
|
||||
alltext.append(" CLASSPUBLICNAMES: " + classPublicNames);
|
||||
|
||||
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
||||
if (dataPropertyStatements != null) {
|
||||
alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n");
|
||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||
while (dataPropertyStmtIter.hasNext()) {
|
||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
||||
alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t ));
|
||||
}
|
||||
}
|
||||
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n");
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
|
@ -306,17 +359,31 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
continue;
|
||||
try {
|
||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||
alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )
|
||||
+ " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ));
|
||||
|
||||
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
//stemmed terms
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||
//unstemmed terms
|
||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||
document.setALLTEXT(alltext.toString());
|
||||
}
|
||||
|
||||
document.writeToLog();
|
||||
|
||||
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
|
||||
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -41,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
|||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||
|
||||
|
@ -111,10 +113,16 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
getAnalyzer());
|
||||
context.setAttribute(ANALYZER, getAnalyzer());
|
||||
|
||||
//bk392 adding another argument to Entity2LuceneDoc
|
||||
// that takes care of sparql queries for context nodes.
|
||||
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)
|
||||
|
||||
);
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
context.setAttribute(LuceneIndexer.class.getName(), indexer);
|
||||
|
@ -250,9 +258,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
||||
|
||||
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -27,91 +27,93 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||
private static String indexDir = null;
|
||||
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
|
||||
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
|
||||
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
|
||||
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
|
||||
private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex";
|
||||
|
||||
/**
|
||||
* Gets run to set up DataSource when the webapp servlet context gets created.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Gets run to set up DataSource when the webapp servlet context gets created.
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
ServletContext context = sce.getServletContext();
|
||||
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
|
||||
try{
|
||||
indexDir = getIndexDirName(sce);
|
||||
log.info("Lucene indexDir: " + indexDir);
|
||||
|
||||
setBoolMax();
|
||||
|
||||
HashSet dataPropertyBlacklist = new HashSet<String>();
|
||||
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||
|
||||
HashSet objectPropertyBlacklist = new HashSet<String>();
|
||||
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
|
||||
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
|
||||
@SuppressWarnings("unchecked")
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
ServletContext context = sce.getServletContext();
|
||||
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
|
||||
try{
|
||||
indexDir = getIndexDirName(sce);
|
||||
log.info("Lucene indexDir: " + indexDir);
|
||||
|
||||
setBoolMax();
|
||||
|
||||
HashSet dataPropertyBlacklist = new HashSet<String>();
|
||||
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||
|
||||
HashSet objectPropertyBlacklist = new HashSet<String>();
|
||||
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
|
||||
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
|
||||
|
||||
//This is where to get a LucenIndex from. The indexer will
|
||||
//need to reference this to notify it of updates to the index
|
||||
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir);
|
||||
String liveIndexDir = lif.getLiveIndexDir(context);
|
||||
|
||||
//here we want to put the LuceneIndex object into the application scope
|
||||
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
|
||||
|
||||
//here we want to put the LuceneIndex object into the application scope
|
||||
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
|
||||
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
|
||||
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
indexer.setLuceneIndexFactory(lif);
|
||||
|
||||
//This is where the builder gets the list of places to try to
|
||||
//get objects to index. It is filtered so that non-public text
|
||||
//does not get into the search index.
|
||||
WebappDaoFactory wadf =
|
||||
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||
|
||||
//This is where the builder gets the list of places to try to
|
||||
//get objects to index. It is filtered so that non-public text
|
||||
//does not get into the search index.
|
||||
WebappDaoFactory wadf =
|
||||
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||
VitroFilters vf = VitroFilterUtils.getPublicFilter(context);
|
||||
wadf = new WebappDaoFactoryFiltering(wadf,vf);
|
||||
|
||||
List sources = new ArrayList();
|
||||
sources.add(wadf.getIndividualDao());
|
||||
|
||||
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
|
||||
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
wadf = new WebappDaoFactoryFiltering(wadf,vf);
|
||||
|
||||
List sources = new ArrayList();
|
||||
sources.add(wadf.getIndividualDao());
|
||||
|
||||
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
|
||||
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
context.setAttribute(IndexBuilder.class.getName(),builder);
|
||||
|
||||
//set up listeners so search index builder is notified of changes to model
|
||||
|
@ -119,36 +121,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
|||
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
|
||||
SearchReindexingListener srl = new SearchReindexingListener( builder );
|
||||
ModelContext.registerListenerForChanges(sce.getServletContext(), srl);
|
||||
|
||||
}catch(Exception ex){
|
||||
log.error("Could not setup lucene full text search." , ex);
|
||||
}
|
||||
|
||||
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets run when the webApp Context gets destroyed.
|
||||
*/
|
||||
|
||||
}catch(Exception ex){
|
||||
log.error("Could not setup lucene full text search." , ex);
|
||||
}
|
||||
|
||||
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets run when the webApp Context gets destroyed.
|
||||
*/
|
||||
@Override
|
||||
public void contextDestroyed(ServletContextEvent sce) {
|
||||
|
||||
|
||||
log.info("**** Running "+this.getClass().getName()+".contextDestroyed()");
|
||||
IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName());
|
||||
builder.stopIndexingThread();
|
||||
}
|
||||
|
||||
/**
|
||||
* In wild card searches the query is first broken into many boolean searches
|
||||
* OR'ed together. So if there is a query that would match a lot of records
|
||||
* we need a high max boolean limit for the lucene search.
|
||||
*
|
||||
* This sets some static method in the lucene library to achieve this.
|
||||
*/
|
||||
public static void setBoolMax() {
|
||||
BooleanQuery.setMaxClauseCount(16384);
|
||||
}
|
||||
|
||||
builder.stopIndexingThread();
|
||||
}
|
||||
|
||||
/**
|
||||
* In wild card searches the query is first broken into many boolean searches
|
||||
* OR'ed together. So if there is a query that would match a lot of records
|
||||
* we need a high max boolean limit for the lucene search.
|
||||
*
|
||||
* This sets some static method in the lucene library to achieve this.
|
||||
*/
|
||||
public static void setBoolMax() {
|
||||
BooleanQuery.setMaxClauseCount(16384);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of the directory to store the lucene index in. The
|
||||
* {@link ConfigurationProperties} should have a property named
|
||||
|
@ -190,14 +192,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
|||
return luceneDir.getPath();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
return new CJKAnalyzer();
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
return new CJKAnalyzer();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.lucene.test;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
public class LuceneDocument {
|
||||
|
||||
private static final Log log = LogFactory.getLog(LuceneDocument.class.getName());
|
||||
|
||||
String URI;
|
||||
String DOCID;
|
||||
String JCLASS;
|
||||
String RDFTYPE;
|
||||
String CLASSGROUP_URI;
|
||||
String MODTIME;
|
||||
String NAME;
|
||||
String PORTAL;
|
||||
String INDEXEDTIME;
|
||||
String TIMEKEY;
|
||||
String SUNSET;
|
||||
String MONIKER;
|
||||
String ALLTEXT;
|
||||
String KEYWORDS;
|
||||
String THUMBNAIL;
|
||||
String PROHIBITED_FROM_TEXT_RESULTS;
|
||||
String CLASSLOCALNAME;
|
||||
String CONTEXTNODE;
|
||||
|
||||
static final String FILE = "~/Desktop/LuceneIndividuals.txt";
|
||||
|
||||
|
||||
public String getURI() {
|
||||
return URI;
|
||||
}
|
||||
|
||||
public void setURI(String uRI) {
|
||||
URI = uRI;
|
||||
}
|
||||
|
||||
public String getDOCID() {
|
||||
return DOCID;
|
||||
}
|
||||
|
||||
public void setDOCID(String dOCID) {
|
||||
DOCID = dOCID;
|
||||
}
|
||||
|
||||
public String getJCLASS() {
|
||||
return JCLASS;
|
||||
}
|
||||
|
||||
public void setJCLASS(String jCLASS) {
|
||||
JCLASS = jCLASS;
|
||||
}
|
||||
|
||||
public String getRDFTYPE() {
|
||||
return RDFTYPE;
|
||||
}
|
||||
|
||||
public void setRDFTYPE(String rDFTYPE) {
|
||||
RDFTYPE = rDFTYPE;
|
||||
}
|
||||
|
||||
public String getCLASSGROUP_URI() {
|
||||
return CLASSGROUP_URI;
|
||||
}
|
||||
|
||||
public void setCLASSGROUP_URI(String cLASSGROUP_URI) {
|
||||
CLASSGROUP_URI = cLASSGROUP_URI;
|
||||
}
|
||||
|
||||
public String getMODTIME() {
|
||||
return MODTIME;
|
||||
}
|
||||
|
||||
public void setMODTIME(String mODTIME) {
|
||||
MODTIME = mODTIME;
|
||||
}
|
||||
|
||||
public String getNAME() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
public void setNAME(String nAME) {
|
||||
NAME = nAME;
|
||||
}
|
||||
|
||||
public String getPORTAL() {
|
||||
return PORTAL;
|
||||
}
|
||||
|
||||
public void setPORTAL(String pORTAL) {
|
||||
PORTAL = pORTAL;
|
||||
}
|
||||
|
||||
public String getINDEXEDTIME() {
|
||||
return INDEXEDTIME;
|
||||
}
|
||||
|
||||
public void setINDEXEDTIME(String iNDEXEDTIME) {
|
||||
INDEXEDTIME = iNDEXEDTIME;
|
||||
}
|
||||
|
||||
public String getTIMEKEY() {
|
||||
return TIMEKEY;
|
||||
}
|
||||
|
||||
public void setTIMEKEY(String tIMEKEY) {
|
||||
TIMEKEY = tIMEKEY;
|
||||
}
|
||||
|
||||
public String getSUNSET() {
|
||||
return SUNSET;
|
||||
}
|
||||
|
||||
public void setSUNSET(String sUNSET) {
|
||||
SUNSET = sUNSET;
|
||||
}
|
||||
|
||||
public String getMONIKER() {
|
||||
return MONIKER;
|
||||
}
|
||||
|
||||
public void setMONIKER(String mONIKER) {
|
||||
MONIKER = mONIKER;
|
||||
}
|
||||
|
||||
public String getALLTEXT() {
|
||||
return ALLTEXT;
|
||||
}
|
||||
|
||||
public void setALLTEXT(String aLLTEXT) {
|
||||
ALLTEXT = aLLTEXT;
|
||||
}
|
||||
|
||||
public String getKEYWORDS() {
|
||||
return KEYWORDS;
|
||||
}
|
||||
|
||||
public void setKEYWORDS(String kEYWORDS) {
|
||||
KEYWORDS = kEYWORDS;
|
||||
}
|
||||
|
||||
public String getTHUMBNAIL() {
|
||||
return THUMBNAIL;
|
||||
}
|
||||
|
||||
public void setTHUMBNAIL(String tHUMBNAIL) {
|
||||
THUMBNAIL = tHUMBNAIL;
|
||||
}
|
||||
|
||||
public String getPROHIBITED_FROM_TEXT_RESULTS() {
|
||||
return PROHIBITED_FROM_TEXT_RESULTS;
|
||||
}
|
||||
|
||||
public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) {
|
||||
PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS;
|
||||
}
|
||||
|
||||
public String getCLASSLOCALNAME() {
|
||||
return CLASSLOCALNAME;
|
||||
}
|
||||
|
||||
public void setCLASSLOCALNAME(String cLASSLOCALNAME) {
|
||||
CLASSLOCALNAME = cLASSLOCALNAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
||||
result.append("\n==================================\n");
|
||||
|
||||
result.append("URI : " + URI);
|
||||
result.append("\nDOCID : " + DOCID);
|
||||
result.append("\nJCLASS : " + JCLASS);
|
||||
result.append("\nRDFTYPE : " + RDFTYPE);
|
||||
result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI);
|
||||
result.append("\nMODTIME : " + MODTIME);
|
||||
result.append("\nNAME : " + NAME);
|
||||
result.append("\nPORTAL : " + PORTAL);
|
||||
result.append("\nINDEXEDTIME : " + INDEXEDTIME);
|
||||
result.append("\nCONTEXTNODE : " + CONTEXTNODE);
|
||||
result.append("\nTIMEKEY : " + TIMEKEY);
|
||||
result.append("\nSUNSET : " + SUNSET);
|
||||
result.append("\nMONIKER : " + MONIKER);
|
||||
result.append("\nALLTEXT : " + ALLTEXT);
|
||||
result.append("\nKEYWORDS : " + KEYWORDS);
|
||||
result.append("\nTHUMBNAIL : " + THUMBNAIL);
|
||||
result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS);
|
||||
result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME);
|
||||
|
||||
return result.toString();
|
||||
|
||||
}
|
||||
|
||||
public void writeToLog(){
|
||||
log.info(this.toString());
|
||||
}
|
||||
|
||||
public void setCONTEXTNODE(String contextNodePropertyValues) {
|
||||
this.CONTEXTNODE = contextNodePropertyValues;
|
||||
}
|
||||
}
|
|
@ -2,23 +2,261 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.solr.client.solrj.beans.Field;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import com.hp.hpl.jena.vocabulary.OWL;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
|
||||
public class IndividualToSolrDocument implements Obj2DocIface {
|
||||
|
||||
protected LuceneDocToSolrDoc luceneToSolr;
|
||||
protected Entity2LuceneDoc entityToLucene;
|
||||
|
||||
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||
entityToLucene = e2d;
|
||||
luceneToSolr = new LuceneDocToSolrDoc();
|
||||
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
|
||||
|
||||
public static VitroTermNames term = new VitroTermNames();
|
||||
|
||||
private static String entClassName = Individual.class.getName();
|
||||
|
||||
private ProhibitedFromSearch classesProhibitedFromSearch;
|
||||
|
||||
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
|
||||
|
||||
private ContextNodesInclusionFactory contextNodesInclusionFactory;
|
||||
|
||||
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||
|
||||
|
||||
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibitedFromSearch,
|
||||
ContextNodesInclusionFactory contextNodesInclusionFactory){
|
||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
|
||||
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException{
|
||||
long tProhibited = System.currentTimeMillis();
|
||||
|
||||
if(!(obj instanceof Individual))
|
||||
return null;
|
||||
|
||||
Individual ent = (Individual)obj;
|
||||
String value;
|
||||
String classPublicNames = "";
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
||||
//DocId
|
||||
String id = ent.getURI();
|
||||
log.debug("translating " + id);
|
||||
|
||||
if(id == null){
|
||||
log.debug("cannot add individuals without URIs to lucene Index");
|
||||
return null;
|
||||
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
|
||||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
|
||||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
|
||||
id.startsWith(OWL.NS)){
|
||||
log.debug("not indexing because of namespace:" + id);
|
||||
return null;
|
||||
}
|
||||
|
||||
//filter out class groups, owl:ObjectProperties etc..
|
||||
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
|
||||
|
||||
// Types and classgroups
|
||||
boolean prohibited = false;
|
||||
List<VClass> vclasses = ent.getVClasses(false);
|
||||
long tClassgroup = System.currentTimeMillis();
|
||||
for(VClass clz : vclasses){
|
||||
if(clz.getURI() == null){
|
||||
continue;
|
||||
}else if(OWL.Thing.getURI().equals(clz.getURI())){
|
||||
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
|
||||
continue;
|
||||
} else if(clz.getURI().startsWith(OWL.NS)){
|
||||
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||
return null;
|
||||
} else {
|
||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()))
|
||||
prohibited = true;
|
||||
if( clz.getSearchBoost() != null)
|
||||
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
|
||||
|
||||
doc.addField(term.RDFTYPE, clz.getURI());
|
||||
|
||||
if(clz.getLocalName() != null){
|
||||
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
|
||||
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
|
||||
}
|
||||
|
||||
if(clz.getName() != null)
|
||||
classPublicNames += clz.getName();
|
||||
|
||||
//Classgroup URI
|
||||
if(clz.getGroupURI() != null){
|
||||
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
|
||||
|
||||
|
||||
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
|
||||
|
||||
//lucene DocID
|
||||
doc.addField(term.DOCID, entClassName + id);
|
||||
|
||||
//vitro id
|
||||
doc.addField(term.URI, id);
|
||||
|
||||
//java class
|
||||
doc.addField(term.JCLASS, entClassName);
|
||||
|
||||
//Individual Label
|
||||
if(ent.getRdfsLabel() != null)
|
||||
value = ent.getRdfsLabel();
|
||||
else{
|
||||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
|
||||
doc.addField(term.NAME_RAW, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
|
||||
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
|
||||
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
|
||||
|
||||
long tContextNodes = System.currentTimeMillis();
|
||||
|
||||
String contextNodePropertyValues = "";
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
|
||||
|
||||
|
||||
doc.addField(term.CONTEXTNODE, contextNodePropertyValues);
|
||||
|
||||
log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes));
|
||||
|
||||
|
||||
long tMoniker = System.currentTimeMillis();
|
||||
|
||||
//Moniker
|
||||
if(ent.getMoniker() != null){
|
||||
doc.addField(term.MONIKER, ent.getMoniker());
|
||||
}
|
||||
|
||||
//boost for entity
|
||||
if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
|
||||
doc.setDocumentBoost(ent.getSearchBoost());
|
||||
|
||||
//thumbnail
|
||||
try{
|
||||
value = null;
|
||||
if(ent.hasThumb())
|
||||
doc.addField(term.THUMBNAIL, "1");
|
||||
else
|
||||
doc.addField(term.THUMBNAIL, "0");
|
||||
}catch(Exception ex){
|
||||
log.debug("could not index thumbnail: " + ex);
|
||||
}
|
||||
|
||||
|
||||
//time of index in millis past epoc
|
||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
|
||||
|
||||
log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
|
||||
|
||||
long tPropertyStatements = System.currentTimeMillis();
|
||||
if(!prohibited){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
String t=null;
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
|
||||
value+= " " + classPublicNames;
|
||||
|
||||
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
||||
if (dataPropertyStatements != null) {
|
||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||
while (dataPropertyStmtIter.hasNext()) {
|
||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
||||
}
|
||||
}
|
||||
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||
continue;
|
||||
try {
|
||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
|
||||
|
||||
doc.addField(term.ALLTEXT, value);
|
||||
doc.addField(term.ALLTEXTUNSTEMMED, value);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
// public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||
//// entityToLucene = e2d;
|
||||
// luceneToSolr = new LuceneDocToSolrDoc();
|
||||
// }
|
||||
|
||||
@Override
|
||||
public boolean canTranslate(Object obj) {
|
||||
return obj != null && obj instanceof Individual;
|
||||
|
@ -34,14 +272,23 @@ public class IndividualToSolrDocument implements Obj2DocIface {
|
|||
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException {
|
||||
return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||
}
|
||||
// @Override
|
||||
// public Object translate(Object obj) throws IndexingException {
|
||||
// return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||
// }
|
||||
|
||||
@Override
|
||||
public Object unTranslate(Object result) {
|
||||
return luceneToSolr.unTranslate( result );
|
||||
Individual ent = null;
|
||||
if( result != null && result instanceof Document){
|
||||
Document hit = (Document) result;
|
||||
String id = hit.get(term.URI);
|
||||
ent = new IndividualImpl();
|
||||
ent.setURI(id);
|
||||
}
|
||||
return ent;
|
||||
}
|
||||
|
||||
public static float NAME_BOOST = 3.0F;
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
|
@ -67,10 +68,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
|||
/* setup the individual to solr doc translation */
|
||||
//first we need a ent2luceneDoc translator
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||
// Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||
// new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
// new IndividualProhibitedFromSearch(context),
|
||||
// new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
// IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
|
||||
o2d.add(indToSolrDoc);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue