Committing changes with merge from search relevance branch.
This commit is contained in:
commit
692446335e
31 changed files with 48715 additions and 2514 deletions
|
@ -222,9 +222,9 @@
|
|||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!-- in this example, we will only use synonyms at query time -->
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
|
||||
|
||||
<!-- Case insensitive stop word removal.
|
||||
add enablePositionIncrements=true in both the index and query
|
||||
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||
|
@ -237,12 +237,12 @@
|
|||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<!-- <filter class="solr.PorterStemFilterFactory"/> -->
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
|
@ -251,6 +251,7 @@
|
|||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
@ -381,7 +382,7 @@
|
|||
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
||||
<filter class="solr.PhoneticFilterFactory" encoder="Metaphone" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
@ -470,39 +471,53 @@
|
|||
when adding a document.
|
||||
-->
|
||||
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/>
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
|
||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
|
||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase -->
|
||||
<field name="nameLowercase" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<!-- A sortable version of nameLowercase -->
|
||||
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
|
||||
<field name="nameUnstemmed" type="lowercase" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||
<!-- A sortable version of nameLowercase -->
|
||||
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
|
||||
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<!--
|
||||
<field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/>
|
||||
-->
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<field name="moniker" type="ignored" />
|
||||
<field name="modType" type="ignored"/>
|
||||
<field name="JCLASS" type="ignored"/>
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXT_PHONETIC" type="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<!-- <field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
|
||||
|
||||
<field name="moniker" type="ignored" />
|
||||
<field name="modType" type="ignored"/>
|
||||
<field name="JCLASS" type="ignored"/>
|
||||
|
||||
<!-- Copy nameLowercase to sortable field. -->
|
||||
<copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
|
||||
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
|
@ -571,7 +586,7 @@
|
|||
<uniqueKey>DocId</uniqueKey>
|
||||
|
||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||
<defaultSearchField>ALLTEXT</defaultSearchField>
|
||||
<!-- <defaultSearchField>ALLTEXT</defaultSearchField> -->
|
||||
|
||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||
<solrQueryParser defaultOperator="AND"/>
|
||||
|
@ -594,7 +609,8 @@
|
|||
<!-- Similarity is the scoring routine for each document vs. a query.
|
||||
A custom similarity may be specified here, but the default is fine
|
||||
for most applications. -->
|
||||
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
||||
<similarity class="org.apache.lucene.search.DefaultSimilarity"/>
|
||||
<!-- <similarity class="edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity"/> -->
|
||||
<!-- ... OR ...
|
||||
Specify a SimilarityFactory class name implementation
|
||||
allowing parameters to be used.
|
||||
|
|
|
@ -385,6 +385,7 @@
|
|||
be based on the last SolrCore to be initialized.
|
||||
|
||||
-->
|
||||
-->
|
||||
<!-- Increasing to handle large wildcard queries used in IndividualListController.
|
||||
See VIVO-384. -->
|
||||
<maxBooleanClauses>50000</maxBooleanClauses>
|
||||
|
@ -706,8 +707,15 @@
|
|||
will be overridden by parameters in the request
|
||||
-->
|
||||
<lst name="defaults">
|
||||
<str name="defType">edismax</str>
|
||||
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
|
||||
<str name="pf">targetInfo</str>
|
||||
<str name="echoParams">explicit</str>
|
||||
<str name="ps">2</str>
|
||||
<str name="qs">2</str>
|
||||
<int name="rows">10</int>
|
||||
<str name="q.alt">*:*</str>
|
||||
<str name="fl">*,score</str>
|
||||
</lst>
|
||||
<!-- In addition to defaults, "appends" params can be specified
|
||||
to identify values which should be appended to the list of
|
||||
|
@ -783,10 +791,12 @@
|
|||
<str name="v.layout">layout</str>
|
||||
<str name="title">Solritas</str>
|
||||
|
||||
<str name="defType">edismax</str>
|
||||
<!-- <str name="defType">edismax</str> -->
|
||||
|
||||
<str name="q.alt">*:*</str>
|
||||
<str name="rows">10</str>
|
||||
<str name="fl">*,score</str>
|
||||
|
||||
<str name="mlt.qf">
|
||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||
</str>
|
||||
|
@ -1395,8 +1405,7 @@
|
|||
</fragmentsBuilder>
|
||||
</highlighting>
|
||||
</searchComponent>
|
||||
|
||||
<!-- Autocomplete -->
|
||||
<!-- Autocomplete -->
|
||||
<!--
|
||||
<searchComponent class="solr.SpellCheckComponent" name="suggest">
|
||||
<lst name="spellchecker">
|
||||
|
@ -1435,7 +1444,6 @@
|
|||
</arr>
|
||||
</requestHandler>
|
||||
-->
|
||||
|
||||
<!-- Update Processors
|
||||
|
||||
Chains of Update Processor Factories for dealing with Update
|
||||
|
|
|
@ -56,3 +56,122 @@ was
|
|||
will
|
||||
with
|
||||
|
||||
# these stopwords are taken
|
||||
# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2
|
||||
|
||||
about
|
||||
after
|
||||
all
|
||||
also
|
||||
an
|
||||
and
|
||||
another
|
||||
any
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
because
|
||||
been
|
||||
before
|
||||
being
|
||||
between
|
||||
both
|
||||
but
|
||||
by
|
||||
came
|
||||
can
|
||||
come
|
||||
could
|
||||
did
|
||||
do
|
||||
does
|
||||
each
|
||||
else
|
||||
for
|
||||
from
|
||||
get
|
||||
got
|
||||
has
|
||||
had
|
||||
he
|
||||
have
|
||||
her
|
||||
here
|
||||
him
|
||||
himself
|
||||
his
|
||||
how
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
its
|
||||
just
|
||||
like
|
||||
make
|
||||
many
|
||||
me
|
||||
might
|
||||
more
|
||||
most
|
||||
much
|
||||
must
|
||||
my
|
||||
never
|
||||
now
|
||||
of
|
||||
on
|
||||
only
|
||||
or
|
||||
other
|
||||
our
|
||||
out
|
||||
over
|
||||
re
|
||||
said
|
||||
same
|
||||
see
|
||||
should
|
||||
since
|
||||
so
|
||||
some
|
||||
still
|
||||
such
|
||||
take
|
||||
than
|
||||
that
|
||||
the
|
||||
their
|
||||
them
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
those
|
||||
through
|
||||
to
|
||||
too
|
||||
under
|
||||
up
|
||||
use
|
||||
very
|
||||
want
|
||||
was
|
||||
way
|
||||
we
|
||||
well
|
||||
were
|
||||
what
|
||||
when
|
||||
where
|
||||
which
|
||||
while
|
||||
who
|
||||
will
|
||||
with
|
||||
would
|
||||
you
|
||||
your
|
||||
|
|
44724
solr/exampleSolr/conf/syn.txt
Normal file
44724
solr/exampleSolr/conf/syn.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -36,7 +36,14 @@ log4j.rootLogger=INFO, AllAppender
|
|||
log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN
|
||||
|
||||
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument=DEBUG
|
||||
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters=DEBUG
|
||||
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.ContextNodeFields=DEBUG
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=INFO
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=DEBUG
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexWorkerThread=INFO
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.SolrIndexer=INFO
|
||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.controller.SolrPagedSearchController=DEBUG
|
||||
# suppress odd warnings from libraries
|
||||
log4j.logger.org.openjena.riot=FATAL
|
||||
log4j.logger.org.directwebremoting=FATAL
|
|
@ -24,6 +24,9 @@ public class DisplayVocabulary {
|
|||
/* Individuals */
|
||||
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
|
||||
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
|
||||
|
||||
/* Page types */
|
||||
public static final String PAGE_TYPE = NS + "Page";
|
||||
public static final String HOME_PAGE_TYPE = NS + "HomePage";
|
||||
|
@ -38,6 +41,8 @@ public class DisplayVocabulary {
|
|||
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
|
||||
public static final String TITLE = NS + "title";
|
||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
|
||||
|
||||
/* URIs for storing menu.n3 */
|
||||
public static final String MENU_TEXT_RES = NS + "MenuText";
|
||||
|
|
|
@ -780,7 +780,7 @@ public class IndividualJena extends IndividualImpl implements Individual {
|
|||
Statement stmt = stmtIt.nextStatement();
|
||||
if (stmt.getObject().isURIResource()) {
|
||||
String typeURI = ((Resource)stmt.getObject()).getURI();
|
||||
if (pfs.isClassProhibited(typeURI)) {
|
||||
if (pfs.isClassProhibitedFromSearch(typeURI)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1069,7 +1069,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
|
|||
|
||||
while(itr.hasNext()) {
|
||||
String typeURI = itr.next().getURI();
|
||||
if (pfs.isClassProhibited(typeURI)) {
|
||||
if (pfs.isClassProhibitedFromSearch(typeURI)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -324,7 +324,7 @@ public class VClassGroupDaoJena extends JenaBaseDao implements VClassGroupDao {
|
|||
for (VClassGroup group : groups) {
|
||||
List<VClass> classList = new ArrayList<VClass>();
|
||||
for (VClass vclass : group.getVitroClassList()) {
|
||||
if (!pfs.isClassProhibited(vclass.getURI())) {
|
||||
if (!pfs.isClassProhibitedFromSearch(vclass.getURI())) {
|
||||
classList.add(vclass);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search;
|
||||
|
||||
public class VitroTermNames {
|
||||
/** Id of entity, vclass or tab */
|
||||
public static String URI = "URI";
|
||||
/** lucene document id */
|
||||
public static String DOCID = "DocId";
|
||||
/** java class of the object that the Doc represents. */
|
||||
public static String JCLASS = "JCLASS";
|
||||
/** rdf:type */
|
||||
public static String RDFTYPE = "type";
|
||||
/** rdf:type */
|
||||
public static String CLASSGROUP_URI = "classgroup";
|
||||
/** Modtime from db */
|
||||
public static String MODTIME = "modTime";
|
||||
|
||||
/** time of index in msec since epoc */
|
||||
public static String INDEXEDTIME= "indexedTime";
|
||||
/** timekey of entity in yyyymmddhhmm */
|
||||
public static String TIMEKEY="TIMEKEY";
|
||||
/** time of sunset/end of entity in yyyymmddhhmm */
|
||||
public static String SUNSET="SUNSET";
|
||||
/** time of sunrise/start of entity in yyyymmddhhmm */
|
||||
public static String SUNRISE="SUNRISE";
|
||||
/** entity's moniker */
|
||||
public static String MONIKER="moniker";
|
||||
/** text for 'full text' search, this is stemmed */
|
||||
public static String ALLTEXT = "ALLTEXT";
|
||||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing targeted information **/
|
||||
public static final String targetInfo = "targetInfo";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
public static final String THUMBNAIL = "THUMBNAIL";
|
||||
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAME = "classLocalName";
|
||||
|
||||
// Fields derived from rdfs:label
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||
|
||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||
|
||||
/** field for beta values of all documents **/
|
||||
public static final String BETA = "BETA";
|
||||
public static final String PHI = "PHI";
|
||||
public static final String ADJACENT_NODES = "ADJACENT_NODES";
|
||||
|
||||
/** adding phonetic field **/
|
||||
public static final String ALLTEXT_PHONETIC = "ALLTEXT_PHONETIC";
|
||||
public static final String NAME_PHONETIC = "NAME_PHONETIC";
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||
|
||||
public interface ClassProhibitedFromSearch {
|
||||
public boolean isClassProhibitedFromSearch(String classUri);
|
||||
}
|
|
@ -1,67 +1,5 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
import com.hp.hpl.jena.vocabulary.OWL;
|
||||
import com.hp.hpl.jena.vocabulary.RDF;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
|
||||
public class IndividualProhibitedFromSearch {
|
||||
|
||||
protected OntModel fullModel;
|
||||
|
||||
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearch.class);
|
||||
|
||||
|
||||
public IndividualProhibitedFromSearch( ServletContext context ){
|
||||
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
|
||||
}
|
||||
|
||||
public boolean isIndividualProhibited(String uri){
|
||||
if( uri == null || uri.isEmpty() )
|
||||
return true;
|
||||
|
||||
boolean prohibited = false;
|
||||
try {
|
||||
fullModel.getLock().enterCriticalSection(Lock.READ);
|
||||
Query query = makeAskQueryForUri( uri );
|
||||
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
|
||||
} finally {
|
||||
fullModel.getLock().leaveCriticalSection();
|
||||
}
|
||||
if( prohibited )
|
||||
log.debug("prohibited " + uri);
|
||||
|
||||
return prohibited;
|
||||
}
|
||||
|
||||
private Query makeAskQueryForUri( String uri ){
|
||||
String queryString =
|
||||
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
|
||||
"ASK { \n" +
|
||||
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
|
||||
" FILTER ( \n" +
|
||||
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
|
||||
" && \n"+
|
||||
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
|
||||
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
|
||||
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
|
||||
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
|
||||
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
|
||||
" )\n" +
|
||||
"}" ;
|
||||
return QueryFactory.create( queryString );
|
||||
}
|
||||
public interface IndividualProhibitedFromSearch {
|
||||
public boolean isIndividualProhibited(String uri);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
import com.hp.hpl.jena.vocabulary.OWL;
|
||||
import com.hp.hpl.jena.vocabulary.RDF;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
|
||||
public class IndividualProhibitedFromSearchImpl implements IndividualProhibitedFromSearch {
|
||||
|
||||
protected OntModel fullModel;
|
||||
|
||||
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearchImpl.class);
|
||||
|
||||
public IndividualProhibitedFromSearchImpl( ServletContext context ){
|
||||
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
|
||||
}
|
||||
|
||||
public IndividualProhibitedFromSearchImpl( OntModel fullModel ){
|
||||
this.fullModel = fullModel;
|
||||
}
|
||||
|
||||
public boolean isIndividualProhibited(String uri){
|
||||
if( uri == null || uri.isEmpty() )
|
||||
return true;
|
||||
|
||||
boolean prohibited = false;
|
||||
try {
|
||||
fullModel.getLock().enterCriticalSection(Lock.READ);
|
||||
Query query = makeAskQueryForUri( uri );
|
||||
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
|
||||
} finally {
|
||||
fullModel.getLock().leaveCriticalSection();
|
||||
}
|
||||
if( prohibited )
|
||||
log.debug("prohibited " + uri);
|
||||
|
||||
return prohibited;
|
||||
}
|
||||
|
||||
private Query makeAskQueryForUri( String uri ){
|
||||
String queryString =
|
||||
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
|
||||
"ASK { \n" +
|
||||
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
|
||||
" FILTER ( \n" +
|
||||
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
|
||||
" && \n"+
|
||||
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
|
||||
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
|
||||
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
|
||||
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
|
||||
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
|
||||
" )\n" +
|
||||
"}" ;
|
||||
return QueryFactory.create( queryString );
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ import com.hp.hpl.jena.shared.Lock;
|
|||
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||
|
||||
public class ProhibitedFromSearch {
|
||||
public class ProhibitedFromSearch implements ClassProhibitedFromSearch{
|
||||
List<String> prohibitedClasses;
|
||||
String ProhibitedFromSearchURI;
|
||||
|
||||
|
@ -41,7 +41,7 @@ public class ProhibitedFromSearch {
|
|||
model.register(new ProhibitedFromSearchChangeListener( this ));
|
||||
}
|
||||
|
||||
public synchronized boolean isClassProhibited(String classURI){
|
||||
public synchronized boolean isClassProhibitedFromSearch(String classURI){
|
||||
if( classURI != null ){
|
||||
boolean p = prohibitedClasses.contains(classURI);
|
||||
log.debug( classURI + " is " + (p?"prohibited":"not prohibited"));
|
||||
|
|
|
@ -49,6 +49,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
|
|||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
|
||||
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
|
||||
|
@ -178,6 +179,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
|
||||
QueryResponse response = null;
|
||||
|
||||
|
||||
try {
|
||||
response = solr.query(query);
|
||||
|
||||
|
@ -349,11 +351,11 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
for(int i=0; i<hitCount && n > grpsFound ;i++){
|
||||
try{
|
||||
SolrDocument doc = docs.get(i);
|
||||
Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI);
|
||||
Collection<Object> grps = doc.getFieldValues(VitroTermNames.CLASSGROUP_URI);
|
||||
if (grps != null) {
|
||||
for (Object o : grps) {
|
||||
String groupUri = o.toString();
|
||||
if( groupUri != null && ! classGroupsInHits.contains(groupUri)){
|
||||
if( groupUri != null && !classGroupsInHits.contains(groupUri)){
|
||||
classGroupsInHits.add(groupUri);
|
||||
grpsFound++;
|
||||
if( grpsFound >= n )
|
||||
|
@ -364,6 +366,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
} catch(Exception e) {
|
||||
log.error("problem getting VClassGroups from search hits "
|
||||
+ e.getMessage() );
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,8 +9,11 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
import java.util.Queue;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -21,6 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
|
||||
|
||||
|
||||
/**
|
||||
* The IndexBuilder is used to rebuild or update a search index.
|
||||
|
@ -282,8 +287,29 @@ public class IndexBuilder extends Thread {
|
|||
* @throws AbortIndexing
|
||||
*/
|
||||
private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{
|
||||
long starttime = System.currentTimeMillis();
|
||||
long count = 0;
|
||||
|
||||
|
||||
// long starttime = System.currentTimeMillis();
|
||||
int count = 0;
|
||||
int numOfThreads = 10;
|
||||
|
||||
|
||||
List<IndexWorkerThread> workers = new ArrayList<IndexWorkerThread>();
|
||||
boolean distributing = true;
|
||||
|
||||
for(int i = 0; i< numOfThreads ;i++){
|
||||
workers.add(new IndexWorkerThread(indexer,i,distributing)); // made a pool of workers
|
||||
}
|
||||
|
||||
log.info("Indexing worker pool ready for indexing.");
|
||||
|
||||
// starting worker threads
|
||||
|
||||
for(int i =0; i < numOfThreads; i++){
|
||||
workers.get(i).start();
|
||||
}
|
||||
|
||||
|
||||
while(individuals.hasNext()){
|
||||
if( stopRequested )
|
||||
throw new AbortIndexing();
|
||||
|
@ -291,7 +317,11 @@ public class IndexBuilder extends Thread {
|
|||
Individual ind = null;
|
||||
try{
|
||||
ind = individuals.next();
|
||||
indexer.index(ind, newDocs);
|
||||
|
||||
//indexer.index(ind);
|
||||
|
||||
workers.get(count%numOfThreads).addToQueue(ind); // adding individual to worker queue.
|
||||
|
||||
}catch(Throwable ex){
|
||||
if( stopRequested || log == null){//log might be null if system is shutting down.
|
||||
throw new AbortIndexing();
|
||||
|
@ -300,21 +330,34 @@ public class IndexBuilder extends Thread {
|
|||
log.warn("Error indexing individual " + uri + " " + ex.getMessage());
|
||||
}
|
||||
count++;
|
||||
if( log.isDebugEnabled() ){
|
||||
/* if( log.isDebugEnabled() ){
|
||||
if( (count % 100 ) == 0 && count > 0 ){
|
||||
long dt = (System.currentTimeMillis() - starttime);
|
||||
log.debug("individuals indexed: " + count + " in " + dt + " msec " +
|
||||
" time pre individual = " + (dt / count) + " msec" );
|
||||
}
|
||||
} */
|
||||
}
|
||||
|
||||
for(int i =0 ; i < numOfThreads; i ++){
|
||||
workers.get(i).setDistributing(false);
|
||||
}
|
||||
for(int i =0; i < numOfThreads; i++){
|
||||
try{
|
||||
workers.get(i).join();
|
||||
}catch(InterruptedException e){
|
||||
log.error(e,e);
|
||||
}
|
||||
}
|
||||
|
||||
log.info(
|
||||
/* log.info(
|
||||
"individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" +
|
||||
(count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"")
|
||||
);
|
||||
);*/
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* For a list of individuals, this builds a list of dependent resources and returns it.
|
||||
*/
|
||||
|
@ -388,4 +431,6 @@ public class IndexBuilder extends Thread {
|
|||
private class AbortIndexing extends Exception {
|
||||
// Just a vanilla exception
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.indexing;
|
||||
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
|
||||
|
||||
|
||||
|
||||
class IndexWorkerThread extends Thread{
|
||||
|
||||
protected IndividualToSolrDocument individualToSolrDoc;
|
||||
private IndexerIface indexer = null;
|
||||
private Log log = LogFactory.getLog(IndexWorkerThread.class);
|
||||
private static long count=0;
|
||||
private Queue<Individual> indQueue = new LinkedList<Individual>();
|
||||
private int threadNum;
|
||||
private static long starttime = 0;
|
||||
private boolean distributing;
|
||||
|
||||
public IndexWorkerThread(IndexerIface indexer, int threadNum,boolean distributing){
|
||||
this.indexer = indexer;
|
||||
this.threadNum = threadNum;
|
||||
this.distributing = distributing;
|
||||
synchronized(this){
|
||||
if(starttime == 0)
|
||||
starttime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
public void addToQueue(Individual ind){
|
||||
synchronized(indQueue){
|
||||
indQueue.offer(ind);
|
||||
indQueue.notify();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isQueueEmpty(){
|
||||
return indQueue.isEmpty();
|
||||
}
|
||||
|
||||
public void setDistributing(boolean distributing){
|
||||
this.distributing = distributing;
|
||||
}
|
||||
|
||||
public void run(){
|
||||
|
||||
while(this.distributing){
|
||||
synchronized(indQueue){
|
||||
try{
|
||||
while(indQueue.isEmpty() && this.distributing){
|
||||
try{
|
||||
log.debug("Worker number " + threadNum + " waiting on some work to be alloted.");
|
||||
indQueue.wait(1000);
|
||||
}catch(InterruptedException ie){
|
||||
log.error(ie,ie);
|
||||
}
|
||||
}
|
||||
|
||||
Thread.sleep(50); //wait a bit to let a bit more work to come into the queue
|
||||
log.debug("work found for Woker number " + threadNum);
|
||||
addDocsToIndex();
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
log.debug("Worker number " + threadNum + " woken up",e);
|
||||
}
|
||||
catch(Throwable e){
|
||||
log.error(e,e);
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info("Worker number " + threadNum + " exiting.");
|
||||
}
|
||||
|
||||
protected void addDocsToIndex() throws IndexingException{
|
||||
|
||||
while(!indQueue.isEmpty()){
|
||||
indexer.index(indQueue.poll());
|
||||
synchronized(this){
|
||||
count++;
|
||||
if( log.isInfoEnabled() ){
|
||||
if( (count % 100 ) == 0 && count > 0 ){
|
||||
long dt = (System.currentTimeMillis() - starttime);
|
||||
log.info("individuals indexed: " + count + " in " + dt + " msec " +
|
||||
" time per individual = " + (dt / count) + " msec" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -38,7 +38,7 @@ public interface IndexerIface {
|
|||
* @param newDoc - if true, just insert doc, if false attempt to update.
|
||||
* @throws IndexingException
|
||||
*/
|
||||
public void index(Individual ind, boolean newDoc)throws IndexingException;
|
||||
public void index(Individual ind)throws IndexingException;
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -21,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
|||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
|
@ -31,6 +34,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
|||
* be as full as possible.
|
||||
*/
|
||||
public class Entity2LuceneDoc implements Obj2DocIface{
|
||||
|
||||
/** These are the terms for the lucene index */
|
||||
public static class VitroLuceneTermNames{
|
||||
/** Id of entity, vclass or tab */
|
||||
|
@ -61,6 +65,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing context nodes **/
|
||||
public static final String CONTEXTNODE = "contextNode";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
|
@ -108,6 +114,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
|
||||
private IndividualProhibitedFromSearch individualProhibited;
|
||||
|
||||
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
|
||||
|
||||
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||
|
||||
public Entity2LuceneDoc(
|
||||
ProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibited){
|
||||
|
@ -128,7 +138,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
Document doc = new Document();
|
||||
String classPublicNames = "";
|
||||
|
||||
|
||||
//DocId
|
||||
String id = ent.getURI();
|
||||
log.debug("translating " + id);
|
||||
|
@ -162,7 +171,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||
return null;
|
||||
}else{
|
||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()) )
|
||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()) )
|
||||
prohibited = true;
|
||||
|
||||
if( clz.getSearchBoost() != null )
|
||||
|
@ -197,9 +206,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
doc.add( new Field(term.DOCID, entClassName + id,
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
|
||||
//vitro Id
|
||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
|
||||
//java class
|
||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
|
@ -212,7 +223,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
|
||||
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
nameRaw.setBoost(NAME_BOOST);
|
||||
doc.add(nameRaw);
|
||||
|
@ -230,6 +240,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
nameStemmed.setBoost(NAME_BOOST);
|
||||
doc.add(nameStemmed);
|
||||
|
||||
String contextNodePropertyValues;
|
||||
|
||||
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
/*contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI());
|
||||
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); */
|
||||
|
||||
// }
|
||||
|
||||
/* Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
|
||||
doc.add(contextNodeInformation);*/
|
||||
|
||||
//Moniker
|
||||
|
||||
|
@ -279,6 +303,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
|
||||
if( ! prohibited ){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
String t=null;
|
||||
|
@ -310,6 +335,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
continue;
|
||||
try {
|
||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||
|
||||
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
|
@ -321,6 +352,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||
}
|
||||
|
||||
|
||||
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
|
||||
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
|
|
@ -474,4 +474,10 @@ public class LuceneIndexer implements IndexerIface {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void index(Individual ind) throws IndexingException {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
|
@ -38,7 +39,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
|
@ -111,10 +112,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
getAnalyzer());
|
||||
context.setAttribute(ANALYZER, getAnalyzer());
|
||||
|
||||
//bk392 adding another argument to Entity2LuceneDoc
|
||||
// that takes care of sparql queries for context nodes.
|
||||
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearchImpl(context)
|
||||
|
||||
);
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
context.setAttribute(LuceneIndexer.class.getName(), indexer);
|
||||
|
@ -250,9 +256,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
||||
|
||||
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
|
||||
|
@ -92,7 +92,8 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
|||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearchImpl(context)
|
||||
);
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
indexer.setLuceneIndexFactory(lif);
|
||||
|
|
|
@ -0,0 +1,347 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
|
||||
import com.hp.hpl.jena.query.Dataset;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecution;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.query.QuerySolution;
|
||||
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||
import com.hp.hpl.jena.query.ResultSet;
|
||||
import com.hp.hpl.jena.query.Syntax;
|
||||
import com.hp.hpl.jena.rdf.model.Model;
|
||||
import com.hp.hpl.jena.rdf.model.Property;
|
||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||
import com.hp.hpl.jena.rdf.model.Resource;
|
||||
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||
import com.hp.hpl.jena.rdf.model.StmtIterator;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
|
||||
|
||||
public class CalculateParameters implements DocumentModifier {
|
||||
|
||||
private Dataset dataset;
|
||||
public static int totalInd=1;
|
||||
protected Map<String,Float> betaMap = new Hashtable<String,Float>();
|
||||
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
private static final String betaQuery = prefix + " SELECT count(distinct ?inLinks) " +
|
||||
" WHERE { " +
|
||||
" ?uri rdf:type owl:Thing . " +
|
||||
" ?inLinks ?prop ?uri . " +
|
||||
" } ";
|
||||
|
||||
private static final String totalCountQuery = prefix + " SELECT count(distinct ?ind) " +
|
||||
" WHERE { " +
|
||||
" ?ind rdf:type owl:Thing . " +
|
||||
" } ";
|
||||
|
||||
private static Log log = LogFactory.getLog(CalculateParameters.class);
|
||||
|
||||
private static final String[] fieldsToAddBetaTo = {
|
||||
VitroTermNames.NAME_RAW,
|
||||
VitroTermNames.NAME_LOWERCASE,
|
||||
VitroTermNames.NAME_UNSTEMMED,
|
||||
VitroTermNames.NAME_STEMMED
|
||||
};
|
||||
|
||||
private static final String[] fieldsToMultiplyBetaBy = {
|
||||
VitroTermNames.ALLTEXT,
|
||||
VitroTermNames.ALLTEXTUNSTEMMED,
|
||||
};
|
||||
|
||||
public CalculateParameters(Dataset dataset){
|
||||
this.dataset =dataset;
|
||||
new Thread(new TotalInd(this.dataset,totalCountQuery)).start();
|
||||
}
|
||||
|
||||
public CalculateParameters(){
|
||||
super();
|
||||
}
|
||||
|
||||
public float calculateBeta(String uri){
|
||||
float beta=0;
|
||||
int Conn=0;
|
||||
Query query;
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
QuerySolution soln = null;
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
initialBinding.add("uri", uriResource);
|
||||
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
query = QueryFactory.create(betaQuery,Syntax.syntaxARQ);
|
||||
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
|
||||
ResultSet results = qexec.execSelect();
|
||||
List<String> resultVars = results.getResultVars();
|
||||
if(resultVars!=null && resultVars.size()!=0){
|
||||
soln = results.next();
|
||||
Conn = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
}finally{
|
||||
dataset.getLock().leaveCriticalSection();
|
||||
}
|
||||
|
||||
beta = (float)Conn/totalInd;
|
||||
beta *= 100;
|
||||
beta += 1;
|
||||
return beta;
|
||||
}
|
||||
|
||||
public float calculatePhi(StringBuffer adjNodes){
|
||||
|
||||
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
|
||||
String uri=null;
|
||||
int size=0;
|
||||
float phi = 0.1F;
|
||||
while(nodes.hasMoreTokens()){
|
||||
size++;
|
||||
uri = nodes.nextToken();
|
||||
phi += getBeta(uri);
|
||||
}
|
||||
if(size>0)
|
||||
phi = (float)phi/size;
|
||||
else
|
||||
phi = 1;
|
||||
return phi;
|
||||
}
|
||||
|
||||
public synchronized Float getBeta(String uri){
|
||||
|
||||
float beta;
|
||||
if(betaMap.containsKey(uri)){
|
||||
beta = betaMap.get(uri);
|
||||
}else{
|
||||
beta = calculateBeta(uri); // or calculate & put in map
|
||||
betaMap.put(uri, beta);
|
||||
}
|
||||
return beta;
|
||||
|
||||
}
|
||||
|
||||
|
||||
public String[] getAdjacentNodes(String uri){
|
||||
|
||||
List<String> queryList = new ArrayList<String>();
|
||||
Set<String> adjacentNodes = new HashSet<String>();
|
||||
Set<String> coauthorNames = new HashSet<String>();
|
||||
String[] info = new String[]{"",""};
|
||||
StringBuffer adjacentNodesConcat = new StringBuffer();
|
||||
StringBuffer coauthorBuff = new StringBuffer();
|
||||
adjacentNodesConcat.append("");
|
||||
coauthorBuff.append("");
|
||||
|
||||
queryList.add(prefix +
|
||||
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
|
||||
" WHERE { " +
|
||||
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
|
||||
" ?uri ?prop ?obj . " +
|
||||
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
|
||||
" ?obj ?prop2 ?obj2 . " +
|
||||
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
|
||||
" ?obj2 ?prop3 ?obj3 . " +
|
||||
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
|
||||
" ?obj3 ?prop4 ?adjobj . " +
|
||||
" ?adjobj rdfs:label ?adjobjLabel . " +
|
||||
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
|
||||
|
||||
" FILTER (?prop !=rdf:type) . " +
|
||||
" FILTER (?prop2!=rdf:type) . " +
|
||||
" FILTER (?prop3!=rdf:type) . " +
|
||||
" FILTER (?prop4!=rdf:type) . " +
|
||||
" FILTER (?adjobj != ?uri) . " +
|
||||
"}");
|
||||
|
||||
queryList.add(prefix +
|
||||
" SELECT ?adjobj " +
|
||||
" WHERE{ " +
|
||||
|
||||
" ?uri rdf:type foaf:Agent . " +
|
||||
" ?uri ?prop ?obj . " +
|
||||
" ?obj ?prop2 ?adjobj . " +
|
||||
|
||||
|
||||
" FILTER (?prop !=rdf:type) . " +
|
||||
" FILTER isURI(?obj) . " +
|
||||
|
||||
" FILTER (?prop2!=rdf:type) . " +
|
||||
" FILTER (?adjobj != ?uri) . " +
|
||||
" FILTER isURI(?adjobj) . " +
|
||||
|
||||
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
|
||||
" UNION " +
|
||||
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
|
||||
" UNION " +
|
||||
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
|
||||
" UNION " +
|
||||
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
|
||||
"}");
|
||||
|
||||
Query query;
|
||||
|
||||
QuerySolution soln;
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
Iterator<String> queryItr = queryList.iterator();
|
||||
|
||||
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||
Resource adjacentIndividual = null;
|
||||
RDFNode coauthor = null;
|
||||
try{
|
||||
while(queryItr.hasNext()){
|
||||
/*if(!isPerson){
|
||||
queryItr.next(); // we don't want first query to execute if the ind is not a person.
|
||||
}*/
|
||||
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
|
||||
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
|
||||
try{
|
||||
ResultSet results = qexec.execSelect();
|
||||
while(results.hasNext()){
|
||||
soln = results.nextSolution();
|
||||
|
||||
adjacentIndividual = (Resource)soln.get("adjobj");
|
||||
if(adjacentIndividual!=null){
|
||||
adjacentNodes.add(adjacentIndividual.getURI());
|
||||
}
|
||||
|
||||
coauthor = soln.get("coauthor");
|
||||
if(coauthor!=null){
|
||||
coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
|
||||
}finally{
|
||||
qexec.close();
|
||||
}
|
||||
}
|
||||
queryList = null;
|
||||
Iterator<String> itr = adjacentNodes.iterator();
|
||||
while(itr.hasNext()){
|
||||
adjacentNodesConcat.append(itr.next() + " ");
|
||||
}
|
||||
|
||||
info[0] = adjacentNodesConcat.toString();
|
||||
|
||||
itr = coauthorNames.iterator();
|
||||
while(itr.hasNext()){
|
||||
coauthorBuff.append(itr.next());
|
||||
}
|
||||
|
||||
info[1] = coauthorBuff.toString();
|
||||
|
||||
}
|
||||
catch(Throwable t){
|
||||
log.error(t,t);
|
||||
}finally{
|
||||
dataset.getLock().leaveCriticalSection();
|
||||
adjacentNodes = null;
|
||||
adjacentNodesConcat = null;
|
||||
coauthorBuff = null;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
|
||||
// TODO Auto-generated method stub
|
||||
// calculate beta value.
|
||||
log.debug("Parameter calculation starts..");
|
||||
|
||||
String uri = individual.getURI();
|
||||
String adjInfo[] = getAdjacentNodes(uri);
|
||||
StringBuffer info = new StringBuffer();
|
||||
info.append(adjInfo[0]);
|
||||
info.append(addUri.toString());
|
||||
float phi = calculatePhi(info);
|
||||
|
||||
for(String term: fieldsToAddBetaTo){
|
||||
SolrInputField f = doc.getField( term );
|
||||
f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST);
|
||||
}
|
||||
|
||||
for(String term: fieldsToMultiplyBetaBy){
|
||||
SolrInputField f = doc.getField( term );
|
||||
f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
|
||||
}
|
||||
|
||||
SolrInputField f = doc.getField(VitroTermNames.targetInfo);
|
||||
f.addValue(adjInfo[1],f.getBoost());
|
||||
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
|
||||
|
||||
log.debug("Parameter calculation is done");
|
||||
}
|
||||
|
||||
public void clearMap(){
|
||||
betaMap.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TotalInd implements Runnable{
|
||||
private Dataset dataset;
|
||||
private String totalCountQuery;
|
||||
private static Log log = LogFactory.getLog(TotalInd.class);
|
||||
|
||||
public TotalInd(Dataset dataset,String totalCountQuery){
|
||||
this.dataset = dataset;
|
||||
this.totalCountQuery = totalCountQuery;
|
||||
|
||||
}
|
||||
public void run(){
|
||||
int totalInd=0;
|
||||
Query query;
|
||||
QuerySolution soln = null;
|
||||
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
query = QueryFactory.create(totalCountQuery,Syntax.syntaxARQ);
|
||||
QueryExecution qexec = QueryExecutionFactory.create(query,dataset);
|
||||
ResultSet results = qexec.execSelect();
|
||||
List<String> resultVars = results.getResultVars();
|
||||
|
||||
if(resultVars!=null && resultVars.size()!=0){
|
||||
soln = results.next();
|
||||
totalInd = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
|
||||
}
|
||||
CalculateParameters.totalInd = totalInd;
|
||||
log.info("Total number of individuals in the system are : " + CalculateParameters.totalInd);
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
}finally{
|
||||
dataset.getLock().leaveCriticalSection();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,452 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
|
||||
import com.hp.hpl.jena.query.Dataset;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecution;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.query.QuerySolution;
|
||||
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||
import com.hp.hpl.jena.query.ResultSet;
|
||||
import com.hp.hpl.jena.query.Syntax;
|
||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||
import com.hp.hpl.jena.rdf.model.Resource;
|
||||
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
|
||||
public class ContextNodeFields implements DocumentModifier{
|
||||
|
||||
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
private static final List<String> singleValuedQueriesForAgent = new ArrayList<String>();
|
||||
private static final List<String> singleValuedQueriesForInformationResource = new ArrayList<String>();
|
||||
private static final List<String> multiValuedQueriesForAgent = new ArrayList<String>();
|
||||
private static final String multiValuedQueryForInformationResource;
|
||||
// private static StringBuffer objectProperties = new StringBuffer();
|
||||
|
||||
private Log log = LogFactory.getLog(ContextNodeFields.class);
|
||||
private Dataset dataset;
|
||||
|
||||
|
||||
public ContextNodeFields(Dataset dataset){
|
||||
this.dataset = dataset;
|
||||
}
|
||||
|
||||
/* TODO: consider a constructor like this:
|
||||
* public ContextNodeFields(OntModel fullModel, List<String> queries )
|
||||
*/
|
||||
|
||||
/*
|
||||
*TODO:
|
||||
* consider reducing the code in this class using a method like the following:
|
||||
*/
|
||||
public StringBuffer runQuery( Individual individual, String query ){
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(individual.getURI());
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
|
||||
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
|
||||
try{
|
||||
ResultSet results = qExec.execSelect();
|
||||
while(results.hasNext()){
|
||||
QuerySolution soln = results.nextSolution();
|
||||
Iterator<String> iter = soln.varNames() ;
|
||||
while( iter.hasNext()){
|
||||
String name = iter.next();
|
||||
RDFNode node = soln.get( name );
|
||||
if( node != null ){
|
||||
propertyValues.append(" " + node.toString());
|
||||
}else{
|
||||
log.debug(name + " is null");
|
||||
}
|
||||
}
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
dataset.getLock().leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
|
||||
|
||||
log.debug("retrieving context node values..");
|
||||
|
||||
SolrInputField field = doc.getField(VitroTermNames.ALLTEXT);
|
||||
SolrInputField targetField = doc.getField(VitroTermNames.targetInfo);
|
||||
StringBuffer objectProperties = new StringBuffer();
|
||||
|
||||
|
||||
objectProperties.append(" ");
|
||||
|
||||
int threadCount = multiValuedQueriesForAgent.size();
|
||||
QueryRunner[] threads = new QueryRunner[threadCount];
|
||||
|
||||
|
||||
//Make a thread for each query and start it.
|
||||
for(int i= 0; i < threadCount; i++){
|
||||
QueryRunner t = new QueryRunner(individual, multiValuedQueriesForAgent.get(i));
|
||||
t.start();
|
||||
threads[i] = t;
|
||||
}
|
||||
|
||||
//Wait for each thread to finish and collect results
|
||||
for(int i = 0 ; i < threadCount ; i++){
|
||||
try {
|
||||
threads[i].join();
|
||||
objectProperties.append( threads[i].getPropertyValues() ) ;
|
||||
threads[i] = null;
|
||||
} catch (InterruptedException e) {
|
||||
log.error("Thread " + threads[i].getName() + " interrupted!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
targetField.addValue(" " + runQuery(individual, multiValuedQueryForInformationResource), targetField.getBoost());
|
||||
|
||||
|
||||
field.addValue(objectProperties, field.getBoost());
|
||||
log.debug("context node values are retrieved");
|
||||
|
||||
|
||||
}
|
||||
|
||||
//single valued queries for foaf:Agent
|
||||
static {
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Position . " +
|
||||
" ?c core:hrJobTitle ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Position . " +
|
||||
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Position . " +
|
||||
" ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Position . " +
|
||||
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Position . " +
|
||||
" ?c core:titleOrRole ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Relationship . " +
|
||||
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Relationship . " +
|
||||
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Relationship . " +
|
||||
" ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:Relationship . " +
|
||||
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:AwardReceipt . " +
|
||||
" ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:AwardReceipt . " +
|
||||
" ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||
" ?c rdf:type core:AwardReceipt . " +
|
||||
" ?c core:description ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||
+ " ?Organization rdfs:label ?ContextNodeProperty . "
|
||||
+ " } ORDER BY ?ContextNodeProperty ");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
+ " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ."
|
||||
+ " }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
+ " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ."
|
||||
+ " }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
+ " ?c core:majorField ?ContextNodeProperty ."
|
||||
+ " }");
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
+ " ?c core:departmentOrSchool ?ContextNodeProperty ."
|
||||
+ " }");
|
||||
|
||||
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
+ " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . "
|
||||
+ " }");
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
//single valued queries for core:InformationResource
|
||||
static {
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ " ?uri rdf:type core:InformationResource . "
|
||||
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
|
||||
+ "?b rdfs:label ?ContextNodeProperty .}");
|
||||
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ " ?uri rdf:type core:InformationResource . "
|
||||
+ " ?uri core:linkedInformationResource ?d ."
|
||||
+ " ?d rdfs:label ?ContextNodeProperty . }");
|
||||
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type core:InformationResource . "
|
||||
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
|
||||
+ "}");
|
||||
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type core:InformationResource . "
|
||||
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
|
||||
+ "}");
|
||||
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type core:InformationResource . "
|
||||
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
|
||||
+ "}");
|
||||
|
||||
singleValuedQueriesForInformationResource.add(prefix +
|
||||
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||
+ "?uri rdf:type core:InformationResource . "
|
||||
+ "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ."
|
||||
+ "}");
|
||||
}
|
||||
|
||||
//multi valued queries
|
||||
|
||||
static{
|
||||
multiValuedQueriesForAgent.add(prefix +
|
||||
"SELECT " +
|
||||
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
|
||||
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
|
||||
" (str(?TitleOrRole) as ?titleOrRole) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Position . "
|
||||
|
||||
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
|
||||
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
|
||||
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
|
||||
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
|
||||
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
|
||||
+ " }");
|
||||
|
||||
multiValuedQueriesForAgent.add(prefix +
|
||||
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
|
||||
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Relationship . "
|
||||
|
||||
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
|
||||
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
|
||||
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
|
||||
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
|
||||
|
||||
+ " } ");
|
||||
|
||||
multiValuedQueriesForAgent.add(prefix +
|
||||
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
|
||||
" (str(?Description) as ?description) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:AwardReceipt . "
|
||||
|
||||
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
|
||||
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
|
||||
+ " OPTIONAL { ?c core:description ?Description . } . "
|
||||
+ " }");
|
||||
|
||||
multiValuedQueriesForAgent.add(prefix +
|
||||
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||
+ " ?Organization rdfs:label ?OrganizationLabel . "
|
||||
+ " }");
|
||||
|
||||
multiValuedQueriesForAgent.add(prefix +
|
||||
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
|
||||
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
|
||||
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
|
||||
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
|
||||
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
|
||||
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
|
||||
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
|
||||
|
||||
+"}");
|
||||
|
||||
}
|
||||
|
||||
//multivalued query for core:InformationResource
|
||||
static {
|
||||
|
||||
multiValuedQueryForInformationResource = prefix +
|
||||
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
|
||||
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
|
||||
"(str(?Features) as ?features) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type core:InformationResource . "
|
||||
|
||||
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
|
||||
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
|
||||
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
|
||||
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
|
||||
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
|
||||
|
||||
+"}" ;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
private class QueryRunner extends Thread{
|
||||
|
||||
private Individual ind;
|
||||
private String query;
|
||||
private StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
public String getPropertyValues(){
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
public QueryRunner(Individual ind, String query){
|
||||
this.ind = ind;
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
|
||||
public void run(){
|
||||
// StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
propertyValues.append(runQuery(ind, query));
|
||||
|
||||
|
||||
// QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
// Resource uriResource = ResourceFactory.createResource(ind.getURI());
|
||||
// initialBinding.add("uri", uriResource);
|
||||
//
|
||||
// Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
|
||||
// dataset.getLock().enterCriticalSection(Lock.READ);
|
||||
// try{
|
||||
// QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
|
||||
// try{
|
||||
// ResultSet results = qExec.execSelect();
|
||||
// while(results.hasNext()){
|
||||
// QuerySolution soln = results.nextSolution();
|
||||
// Iterator<String> iter = soln.varNames() ;
|
||||
// while( iter.hasNext()){
|
||||
// String name = iter.next();
|
||||
// RDFNode node = soln.get( name );
|
||||
// if( node != null ){
|
||||
// propertyValues.append(" " + node.toString());
|
||||
// }else{
|
||||
// log.debug(name + " is null");
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }catch(Throwable t){
|
||||
// log.error(t,t);
|
||||
// } finally{
|
||||
// qExec.close();
|
||||
// }
|
||||
// }finally{
|
||||
// dataset.getLock().leaveCriticalSection();
|
||||
// }
|
||||
//
|
||||
//objectProperties.append(propertyValues.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
|
||||
/**
|
||||
* This interface represents an object that can add to a SolrInputDocument.
|
||||
*/
|
||||
public interface DocumentModifier {
|
||||
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri);
|
||||
|
||||
}
|
|
@ -2,46 +2,330 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import com.hp.hpl.jena.vocabulary.OWL;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
|
||||
public class IndividualToSolrDocument implements Obj2DocIface {
|
||||
public class IndividualToSolrDocument {
|
||||
|
||||
protected LuceneDocToSolrDoc luceneToSolr;
|
||||
protected Entity2LuceneDoc entityToLucene;
|
||||
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
|
||||
|
||||
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||
entityToLucene = e2d;
|
||||
luceneToSolr = new LuceneDocToSolrDoc();
|
||||
public static VitroTermNames term = new VitroTermNames();
|
||||
|
||||
private static String entClassName = Individual.class.getName();
|
||||
|
||||
private ClassProhibitedFromSearch classesProhibitedFromSearch;
|
||||
|
||||
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
|
||||
|
||||
public List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
|
||||
|
||||
private static List<String> contextNodeClassNames = new ArrayList<String>();
|
||||
|
||||
|
||||
|
||||
public IndividualToSolrDocument(
|
||||
ClassProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibitedFromSearch){
|
||||
|
||||
this( classesProhibitedFromSearch,
|
||||
individualProhibitedFromSearch,
|
||||
Collections.EMPTY_LIST);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canTranslate(Object obj) {
|
||||
return obj != null && obj instanceof Individual;
|
||||
public IndividualToSolrDocument(
|
||||
ClassProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibitedFromSearch,
|
||||
List<DocumentModifier> docModifiers){
|
||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
|
||||
this.documentModifiers = docModifiers;
|
||||
fillContextNodes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canUnTranslate(Object result) {
|
||||
return result != null && result instanceof SolrDocument;
|
||||
@SuppressWarnings("static-access")
|
||||
public SolrInputDocument translate(Individual ind) throws IndexingException{
|
||||
long tProhibited = System.currentTimeMillis();
|
||||
ArrayList<String> superClassNames = null;
|
||||
StringBuffer addUri = null;
|
||||
String value;
|
||||
StringBuffer classPublicNames = new StringBuffer();
|
||||
classPublicNames.append("");
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
||||
//DocId
|
||||
String id = ind.getURI();
|
||||
log.debug("translating " + id);
|
||||
|
||||
if(id == null){
|
||||
log.debug("cannot add individuals without URIs to lucene Index");
|
||||
return null;
|
||||
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
|
||||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
|
||||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
|
||||
id.startsWith(OWL.NS)){
|
||||
log.debug("not indexing because of namespace:" + id);
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
//filter out class groups, owl:ObjectProperties etc..
|
||||
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
|
||||
|
||||
// Types and classgroups
|
||||
boolean prohibited = false;
|
||||
List<VClass> vclasses = ind.getVClasses(false);
|
||||
superClassNames = new ArrayList<String>();
|
||||
String superLclName = null;
|
||||
long tClassgroup = System.currentTimeMillis();
|
||||
for(VClass clz : vclasses){
|
||||
superLclName = clz.getLocalName();
|
||||
superClassNames.add(superLclName);
|
||||
if(clz.getURI() == null){
|
||||
continue;
|
||||
}else if(OWL.Thing.getURI().equals(clz.getURI())){
|
||||
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
|
||||
continue;
|
||||
} else if(clz.getURI().startsWith(OWL.NS)){
|
||||
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||
return null;
|
||||
} else if(contextNodeClassNames.contains(superLclName)) { // check to see if context node is being indexed.
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()))
|
||||
prohibited = true;
|
||||
if( clz.getSearchBoost() != null)
|
||||
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
|
||||
|
||||
doc.addField(term.RDFTYPE, clz.getURI());
|
||||
|
||||
if(clz.getLocalName() != null){
|
||||
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
|
||||
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
|
||||
}
|
||||
|
||||
if(clz.getName() != null){
|
||||
classPublicNames.append(" ");
|
||||
classPublicNames.append(clz.getName());
|
||||
}
|
||||
|
||||
//Classgroup URI
|
||||
if(clz.getGroupURI() != null){
|
||||
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if(superClassNames.isEmpty()){
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
|
||||
|
||||
|
||||
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
|
||||
|
||||
//lucene DocID
|
||||
doc.addField(term.DOCID, entClassName + id);
|
||||
|
||||
//vitro id
|
||||
doc.addField(term.URI, id);
|
||||
|
||||
//java class
|
||||
doc.addField(term.JCLASS, entClassName);
|
||||
|
||||
//Individual Label
|
||||
if(ind.getRdfsLabel() != null)
|
||||
value = ind.getRdfsLabel();
|
||||
else{
|
||||
log.debug("Using local name for individual with rdfs:label " + ind.getURI());
|
||||
value = ind.getLocalName();
|
||||
}
|
||||
|
||||
// collecting object property statements
|
||||
|
||||
String uri = ind.getURI();
|
||||
StringBuffer objectNames = new StringBuffer();
|
||||
objectNames.append("");
|
||||
String t=null;
|
||||
addUri = new StringBuffer();
|
||||
addUri.append("");
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||
continue;
|
||||
try {
|
||||
objectNames.append(" ");
|
||||
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
|
||||
addUri.append(" ");
|
||||
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||
doc.addField(term.NAME_RAW, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
|
||||
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
|
||||
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||
}else{
|
||||
doc.addField(term.NAME_RAW, value);
|
||||
doc.addField(term.NAME_LOWERCASE, value.toLowerCase());
|
||||
doc.addField(term.NAME_UNSTEMMED, value);
|
||||
doc.addField(term.NAME_STEMMED, value);
|
||||
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||
}
|
||||
|
||||
|
||||
long tMoniker = System.currentTimeMillis();
|
||||
|
||||
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||
//boost for entity
|
||||
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
|
||||
doc.setDocumentBoost(ind.getSearchBoost());
|
||||
}
|
||||
|
||||
//thumbnail
|
||||
try{
|
||||
value = null;
|
||||
if(ind.hasThumb())
|
||||
doc.addField(term.THUMBNAIL, "1");
|
||||
else
|
||||
doc.addField(term.THUMBNAIL, "0");
|
||||
}catch(Exception ex){
|
||||
log.debug("could not index thumbnail: " + ex);
|
||||
}
|
||||
|
||||
|
||||
//time of index in millis past epoc
|
||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
|
||||
|
||||
log.debug("time to include thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
|
||||
|
||||
long tPropertyStatements = System.currentTimeMillis();
|
||||
|
||||
//collecting data property statements
|
||||
|
||||
if(!prohibited){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
StringBuffer allTextValue = new StringBuffer();
|
||||
allTextValue.append("");
|
||||
allTextValue.append(" ");
|
||||
allTextValue.append(((t=ind.getName()) == null)?"":t);
|
||||
allTextValue.append(" ");
|
||||
allTextValue.append(((t=ind.getAnchor()) == null)?"":t);
|
||||
allTextValue.append(" ");
|
||||
allTextValue.append(classPublicNames.toString());
|
||||
|
||||
List<DataPropertyStatement> dataPropertyStatements = ind.getDataPropertyStatements();
|
||||
if (dataPropertyStatements != null) {
|
||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||
while (dataPropertyStmtIter.hasNext()) {
|
||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||
allTextValue.append(" ");
|
||||
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
|
||||
}
|
||||
}
|
||||
|
||||
allTextValue.append(objectNames.toString());
|
||||
|
||||
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
|
||||
|
||||
String alltext = allTextValue.toString();
|
||||
doc.addField(term.ALLTEXT, alltext);
|
||||
doc.addField(term.ALLTEXTUNSTEMMED, alltext);
|
||||
doc.addField(term.ALLTEXT_PHONETIC, alltext,PHONETIC_BOOST);
|
||||
|
||||
//run the document modifiers
|
||||
if( documentModifiers != null && !documentModifiers.isEmpty()){
|
||||
doc.addField(term.targetInfo,"");
|
||||
for(DocumentModifier modifier: documentModifiers){
|
||||
modifier.modifyDocument(ind, doc, addUri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
public Object getIndexId(Object obj) {
|
||||
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException {
|
||||
return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||
public Individual unTranslate(Object result) {
|
||||
Individual ent = null;
|
||||
if( result != null && result instanceof Document){
|
||||
Document hit = (Document) result;
|
||||
String id = hit.get(term.URI);
|
||||
ent = new IndividualImpl();
|
||||
ent.setURI(id);
|
||||
}
|
||||
return ent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object unTranslate(Object result) {
|
||||
return luceneToSolr.unTranslate( result );
|
||||
private void fillContextNodes(){
|
||||
this.contextNodeClassNames.add("Role");
|
||||
this.contextNodeClassNames.add("AttendeeRole");
|
||||
this.contextNodeClassNames.add("ClinicalRole");
|
||||
this.contextNodeClassNames.add("LeaderRole");
|
||||
this.contextNodeClassNames.add("MemberRole");
|
||||
this.contextNodeClassNames.add("OutreachProviderRole");
|
||||
this.contextNodeClassNames.add("PresenterRole");
|
||||
this.contextNodeClassNames.add("ResearcherRole");
|
||||
this.contextNodeClassNames.add("InvestigatorRole");
|
||||
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
|
||||
this.contextNodeClassNames.add("PrincipalInvestigatorRole");
|
||||
this.contextNodeClassNames.add("ServiceProviderRole");
|
||||
this.contextNodeClassNames.add("TeacherRole");
|
||||
this.contextNodeClassNames.add("Position");
|
||||
this.contextNodeClassNames.add("FacultyAdministrativePosition");
|
||||
this.contextNodeClassNames.add("FacultyPosition");
|
||||
this.contextNodeClassNames.add("LibrarianPosition");
|
||||
this.contextNodeClassNames.add("Non-AcademicPosition");
|
||||
this.contextNodeClassNames.add("Non-FacultyAcademicPosition");
|
||||
this.contextNodeClassNames.add("PostdoctoralPosition");
|
||||
this.contextNodeClassNames.add("AdvisingRelationship");
|
||||
this.contextNodeClassNames.add("Authorship");
|
||||
this.contextNodeClassNames.add("AcademicDegree");
|
||||
}
|
||||
|
||||
|
||||
public static float NAME_BOOST = 2.0F;
|
||||
public static float ALL_TEXT_BOOST = 2.5F;
|
||||
public static float PHONETIC_BOOST = 0.1F;
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
|
||||
/**
|
||||
* Translate a lucene Document into a SolrDocument.
|
||||
*/
|
||||
public class LuceneDocToSolrDoc implements Obj2DocIface {
|
||||
|
||||
|
||||
@Override
|
||||
public boolean canTranslate(Object obj) {
|
||||
return obj != null && obj instanceof Document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canUnTranslate(Object result) {
|
||||
return result != null && result instanceof SolrDocument;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getIndexId(Object obj) {
|
||||
//"this method isn't useful for solr"
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException {
|
||||
Document luceneDoc = (Document)obj;
|
||||
SolrInputDocument solrDoc = new SolrInputDocument();
|
||||
|
||||
for( Object f : luceneDoc.getFields()){
|
||||
Field field = (Field)f;
|
||||
solrDoc.addField( new String(field.name()), field.stringValue() );
|
||||
}
|
||||
return solrDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object unTranslate(Object result) {
|
||||
Individual ind = null;
|
||||
if( result != null && result instanceof SolrDocument){
|
||||
SolrDocument hit = (SolrDocument)result;
|
||||
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
|
||||
ind = new IndividualImpl();
|
||||
ind.setURI(id);
|
||||
}
|
||||
return ind;
|
||||
}
|
||||
|
||||
}
|
|
@ -6,37 +6,36 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
|
||||
|
||||
public class SolrIndexer implements IndexerIface {
|
||||
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
||||
|
||||
protected SolrServer server;
|
||||
protected boolean indexing;
|
||||
protected List<Obj2DocIface> obj2DocList;
|
||||
protected HashSet<String> urisIndexed;
|
||||
protected IndividualToSolrDocument individualToSolrDoc;
|
||||
|
||||
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){
|
||||
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
|
||||
this.server = server;
|
||||
this.obj2DocList = o2d;
|
||||
this.individualToSolrDoc = indToDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException {
|
||||
|
||||
public void index(Individual ind) throws IndexingException {
|
||||
if( ! indexing )
|
||||
throw new IndexingException("SolrIndexer: must call " +
|
||||
"startIndexing() before index().");
|
||||
|
@ -49,38 +48,31 @@ public class SolrIndexer implements IndexerIface {
|
|||
log.debug("already indexed " + ind.getURI() );
|
||||
return;
|
||||
}else{
|
||||
SolrInputDocument solrDoc = null;
|
||||
synchronized(this){
|
||||
urisIndexed.add(ind.getURI());
|
||||
}
|
||||
log.debug("indexing " + ind.getURI());
|
||||
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
|
||||
while (it.hasNext()) {
|
||||
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
|
||||
if (obj2doc.canTranslate(ind)) {
|
||||
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
|
||||
// synchronized(individualToSolrDoc){
|
||||
solrDoc = individualToSolrDoc.translate(ind);
|
||||
// }
|
||||
if( solrDoc != null){
|
||||
//sending each doc individually is inefficient
|
||||
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
||||
docs.add( solrDoc );
|
||||
server.add( docs );
|
||||
// if( !newDoc ){
|
||||
// server.add( docs );
|
||||
// log.debug("updated " + ind.getName() + " " + ind.getURI());
|
||||
// }else{
|
||||
// server.add( docs );
|
||||
// log.debug("added " + ind.getName() + " " + ind.getURI());
|
||||
// }
|
||||
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
||||
// docs.add( solrDoc );
|
||||
UpdateResponse res = server.add( solrDoc );
|
||||
log.debug("response after adding docs to server: "+ res);
|
||||
}else{
|
||||
log.debug("removing from index " + ind.getURI());
|
||||
//TODO: how do we delete document?
|
||||
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new IndexingException(ex.getMessage());
|
||||
} catch (SolrServerException ex) {
|
||||
throw new IndexingException(ex.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -115,12 +107,12 @@ public class SolrIndexer implements IndexerIface {
|
|||
|
||||
|
||||
public synchronized void addObj2Doc(Obj2DocIface o2d) {
|
||||
if (o2d != null)
|
||||
obj2DocList.add(o2d);
|
||||
//no longer used
|
||||
}
|
||||
|
||||
public synchronized List<Obj2DocIface> getObj2DocList() {
|
||||
return obj2DocList;
|
||||
//no longer used
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -131,11 +123,21 @@ public class SolrIndexer implements IndexerIface {
|
|||
@Override
|
||||
public synchronized void endIndexing() {
|
||||
try {
|
||||
server.commit();
|
||||
} catch (Exception e) {
|
||||
UpdateResponse res = server.commit();
|
||||
log.debug("Response after committing to server: "+ res );
|
||||
} catch (SolrServerException e) {
|
||||
log.error("Could not commit to solr server", e);
|
||||
} catch(IOException e){
|
||||
log.error("Could not commit to solr server", e);
|
||||
}finally{
|
||||
if(!individualToSolrDoc.documentModifiers.isEmpty()){
|
||||
if(individualToSolrDoc.documentModifiers.get(0) instanceof CalculateParameters){
|
||||
CalculateParameters c = (CalculateParameters) individualToSolrDoc.documentModifiers.get(0);
|
||||
c.clearMap();
|
||||
log.info("BetaMap cleared");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
server.optimize();
|
||||
} catch (Exception e) {
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.apache.solr.client.solrj.SolrServer;
|
|||
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
||||
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
import com.hp.hpl.jena.query.Dataset;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||
|
@ -21,14 +22,14 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||
|
||||
|
@ -57,8 +58,8 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
|||
}
|
||||
CommonsHttpSolrServer server;
|
||||
server = new CommonsHttpSolrServer( solrServerUrl );
|
||||
server.setSoTimeout(1000); // socket read timeout
|
||||
server.setConnectionTimeout(100);
|
||||
server.setSoTimeout(10000); // socket read timeout
|
||||
server.setConnectionTimeout(10000);
|
||||
server.setDefaultMaxConnectionsPerHost(100);
|
||||
server.setMaxTotalConnections(100);
|
||||
server.setMaxRetries(1);
|
||||
|
@ -67,15 +68,24 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
|||
/* setup the individual to solr doc translation */
|
||||
//first we need a ent2luceneDoc translator
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||
|
||||
OntModel abox = ModelContext.getBaseOntModelSelector(context).getABoxModel();
|
||||
|
||||
OntModel inferences = (OntModel)context.getAttribute( JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME);
|
||||
Dataset dataset = WebappDaoFactoryJena.makeInMemoryDataset(abox, inferences);
|
||||
|
||||
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
|
||||
// modifiers.add(new CalculateParameters(ModelContext.getJenaOntModel(context)));
|
||||
modifiers.add(new CalculateParameters(dataset));
|
||||
modifiers.add(new ContextNodeFields(dataset));
|
||||
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
|
||||
o2d.add(indToSolrDoc);
|
||||
new IndividualProhibitedFromSearchImpl(context),
|
||||
modifiers);
|
||||
|
||||
/* setup solr indexer */
|
||||
SolrIndexer solrIndexer = new SolrIndexer(server, o2d);
|
||||
SolrIndexer solrIndexer = new SolrIndexer(server, indToSolrDoc);
|
||||
if( solrIndexer.isIndexEmpty() ){
|
||||
log.info("solr index is empty, requesting rebuild");
|
||||
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
||||
|
|
|
@ -41,8 +41,8 @@ public class ProhibitedFromSearchTest {
|
|||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( SEARCH_CONFIG_URI , m);
|
||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||
Assert.assertTrue(pfs.prohibitedClasses.size() == 4);
|
||||
Assert.assertTrue(pfs.isClassProhibited(TEST_CLASS));
|
||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
||||
Assert.assertTrue(pfs.isClassProhibitedFromSearch(TEST_CLASS));
|
||||
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -55,8 +55,8 @@ public class ProhibitedFromSearchTest {
|
|||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( "http://NotFound.com/inModel", m);
|
||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||
Assert.assertTrue(pfs.prohibitedClasses.size() == 0);
|
||||
Assert.assertTrue(!pfs.isClassProhibited(TEST_CLASS));
|
||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
||||
Assert.assertTrue(!pfs.isClassProhibitedFromSearch(TEST_CLASS));
|
||||
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||
}
|
||||
|
||||
|
||||
|
@ -122,7 +122,7 @@ public class ProhibitedFromSearchTest {
|
|||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, m);
|
||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||
Assert.assertEquals(1, pfs.prohibitedClasses.size() );
|
||||
Assert.assertTrue(pfs.isClassProhibited("http://vivoweb.org/ontology/core#NonAcademic"));
|
||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
||||
Assert.assertTrue(pfs.isClassProhibitedFromSearch("http://vivoweb.org/ontology/core#NonAcademic"));
|
||||
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue