Committing changes with merge from search relevance branch.
This commit is contained in:
commit
692446335e
31 changed files with 48715 additions and 2514 deletions
|
@ -222,9 +222,9 @@
|
||||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||||
<analyzer type="index">
|
<analyzer type="index">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<!-- in this example, we will only use synonyms at query time
|
<!-- in this example, we will only use synonyms at query time -->
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
<filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
|
||||||
-->
|
|
||||||
<!-- Case insensitive stop word removal.
|
<!-- Case insensitive stop word removal.
|
||||||
add enablePositionIncrements=true in both the index and query
|
add enablePositionIncrements=true in both the index and query
|
||||||
analyzers to leave a 'gap' for more accurate phrase queries.
|
analyzers to leave a 'gap' for more accurate phrase queries.
|
||||||
|
@ -237,12 +237,12 @@
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
<!-- <filter class="solr.PorterStemFilterFactory"/> -->
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<analyzer type="query">
|
<analyzer type="query">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
<filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
|
||||||
<filter class="solr.StopFilterFactory"
|
<filter class="solr.StopFilterFactory"
|
||||||
ignoreCase="true"
|
ignoreCase="true"
|
||||||
words="stopwords.txt"
|
words="stopwords.txt"
|
||||||
|
@ -251,6 +251,7 @@
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||||
|
<filter class="solr.PorterStemFilterFactory"/>
|
||||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
@ -381,7 +382,7 @@
|
||||||
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
||||||
<analyzer>
|
<analyzer>
|
||||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
|
<filter class="solr.PhoneticFilterFactory" encoder="Metaphone" inject="false"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
|
@ -470,39 +471,53 @@
|
||||||
when adding a document.
|
when adding a document.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- **************************** Vitro Fields *************************** -->
|
<!-- **************************** Vitro Fields *************************** -->
|
||||||
|
|
||||||
|
<field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/>
|
||||||
|
|
||||||
|
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||||
|
|
||||||
|
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
||||||
|
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
|
||||||
|
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||||
|
|
||||||
|
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||||
|
<!-- RY Not sure if we need to store nameLowercase -->
|
||||||
|
<field name="nameLowercase" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<!-- A sortable version of nameLowercase -->
|
||||||
|
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
|
||||||
|
<field name="nameUnstemmed" type="lowercase" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
||||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
|
||||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
|
||||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
|
||||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
|
||||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
|
||||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
|
||||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<!-- A sortable version of nameLowercase -->
|
|
||||||
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
|
|
||||||
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<!--
|
<!--
|
||||||
<field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
<field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/>
|
<field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/>
|
||||||
-->
|
-->
|
||||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
|
||||||
|
|
||||||
<field name="moniker" type="ignored" />
|
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="modType" type="ignored"/>
|
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="JCLASS" type="ignored"/>
|
|
||||||
|
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="ALLTEXT_PHONETIC" type="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
||||||
|
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<!-- <field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
|
||||||
|
<field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
|
||||||
|
<field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
|
||||||
|
|
||||||
|
<field name="moniker" type="ignored" />
|
||||||
|
<field name="modType" type="ignored"/>
|
||||||
|
<field name="JCLASS" type="ignored"/>
|
||||||
|
|
||||||
<!-- Copy nameLowercase to sortable field. -->
|
<!-- Copy nameLowercase to sortable field. -->
|
||||||
<copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
|
<copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
|
||||||
|
<!-- **************************** End Vitro Fields *************************** -->
|
||||||
<!-- **************************** End Vitro Fields *************************** -->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- catchall field, containing all other searchable text fields (implemented
|
<!-- catchall field, containing all other searchable text fields (implemented
|
||||||
|
@ -571,7 +586,7 @@
|
||||||
<uniqueKey>DocId</uniqueKey>
|
<uniqueKey>DocId</uniqueKey>
|
||||||
|
|
||||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||||
<defaultSearchField>ALLTEXT</defaultSearchField>
|
<!-- <defaultSearchField>ALLTEXT</defaultSearchField> -->
|
||||||
|
|
||||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||||
<solrQueryParser defaultOperator="AND"/>
|
<solrQueryParser defaultOperator="AND"/>
|
||||||
|
@ -594,7 +609,8 @@
|
||||||
<!-- Similarity is the scoring routine for each document vs. a query.
|
<!-- Similarity is the scoring routine for each document vs. a query.
|
||||||
A custom similarity may be specified here, but the default is fine
|
A custom similarity may be specified here, but the default is fine
|
||||||
for most applications. -->
|
for most applications. -->
|
||||||
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
<similarity class="org.apache.lucene.search.DefaultSimilarity"/>
|
||||||
|
<!-- <similarity class="edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity"/> -->
|
||||||
<!-- ... OR ...
|
<!-- ... OR ...
|
||||||
Specify a SimilarityFactory class name implementation
|
Specify a SimilarityFactory class name implementation
|
||||||
allowing parameters to be used.
|
allowing parameters to be used.
|
||||||
|
|
|
@ -385,6 +385,7 @@
|
||||||
be based on the last SolrCore to be initialized.
|
be based on the last SolrCore to be initialized.
|
||||||
|
|
||||||
-->
|
-->
|
||||||
|
-->
|
||||||
<!-- Increasing to handle large wildcard queries used in IndividualListController.
|
<!-- Increasing to handle large wildcard queries used in IndividualListController.
|
||||||
See VIVO-384. -->
|
See VIVO-384. -->
|
||||||
<maxBooleanClauses>50000</maxBooleanClauses>
|
<maxBooleanClauses>50000</maxBooleanClauses>
|
||||||
|
@ -706,8 +707,15 @@
|
||||||
will be overridden by parameters in the request
|
will be overridden by parameters in the request
|
||||||
-->
|
-->
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
|
<str name="defType">edismax</str>
|
||||||
|
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
|
||||||
|
<str name="pf">targetInfo</str>
|
||||||
<str name="echoParams">explicit</str>
|
<str name="echoParams">explicit</str>
|
||||||
|
<str name="ps">2</str>
|
||||||
|
<str name="qs">2</str>
|
||||||
<int name="rows">10</int>
|
<int name="rows">10</int>
|
||||||
|
<str name="q.alt">*:*</str>
|
||||||
|
<str name="fl">*,score</str>
|
||||||
</lst>
|
</lst>
|
||||||
<!-- In addition to defaults, "appends" params can be specified
|
<!-- In addition to defaults, "appends" params can be specified
|
||||||
to identify values which should be appended to the list of
|
to identify values which should be appended to the list of
|
||||||
|
@ -783,10 +791,12 @@
|
||||||
<str name="v.layout">layout</str>
|
<str name="v.layout">layout</str>
|
||||||
<str name="title">Solritas</str>
|
<str name="title">Solritas</str>
|
||||||
|
|
||||||
<str name="defType">edismax</str>
|
<!-- <str name="defType">edismax</str> -->
|
||||||
|
|
||||||
<str name="q.alt">*:*</str>
|
<str name="q.alt">*:*</str>
|
||||||
<str name="rows">10</str>
|
<str name="rows">10</str>
|
||||||
<str name="fl">*,score</str>
|
<str name="fl">*,score</str>
|
||||||
|
|
||||||
<str name="mlt.qf">
|
<str name="mlt.qf">
|
||||||
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
</str>
|
</str>
|
||||||
|
@ -1395,8 +1405,7 @@
|
||||||
</fragmentsBuilder>
|
</fragmentsBuilder>
|
||||||
</highlighting>
|
</highlighting>
|
||||||
</searchComponent>
|
</searchComponent>
|
||||||
|
<!-- Autocomplete -->
|
||||||
<!-- Autocomplete -->
|
|
||||||
<!--
|
<!--
|
||||||
<searchComponent class="solr.SpellCheckComponent" name="suggest">
|
<searchComponent class="solr.SpellCheckComponent" name="suggest">
|
||||||
<lst name="spellchecker">
|
<lst name="spellchecker">
|
||||||
|
@ -1435,7 +1444,6 @@
|
||||||
</arr>
|
</arr>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- Update Processors
|
<!-- Update Processors
|
||||||
|
|
||||||
Chains of Update Processor Factories for dealing with Update
|
Chains of Update Processor Factories for dealing with Update
|
||||||
|
|
|
@ -56,3 +56,122 @@ was
|
||||||
will
|
will
|
||||||
with
|
with
|
||||||
|
|
||||||
|
# these stopwords are taken
|
||||||
|
# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2
|
||||||
|
|
||||||
|
about
|
||||||
|
after
|
||||||
|
all
|
||||||
|
also
|
||||||
|
an
|
||||||
|
and
|
||||||
|
another
|
||||||
|
any
|
||||||
|
are
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
because
|
||||||
|
been
|
||||||
|
before
|
||||||
|
being
|
||||||
|
between
|
||||||
|
both
|
||||||
|
but
|
||||||
|
by
|
||||||
|
came
|
||||||
|
can
|
||||||
|
come
|
||||||
|
could
|
||||||
|
did
|
||||||
|
do
|
||||||
|
does
|
||||||
|
each
|
||||||
|
else
|
||||||
|
for
|
||||||
|
from
|
||||||
|
get
|
||||||
|
got
|
||||||
|
has
|
||||||
|
had
|
||||||
|
he
|
||||||
|
have
|
||||||
|
her
|
||||||
|
here
|
||||||
|
him
|
||||||
|
himself
|
||||||
|
his
|
||||||
|
how
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
its
|
||||||
|
just
|
||||||
|
like
|
||||||
|
make
|
||||||
|
many
|
||||||
|
me
|
||||||
|
might
|
||||||
|
more
|
||||||
|
most
|
||||||
|
much
|
||||||
|
must
|
||||||
|
my
|
||||||
|
never
|
||||||
|
now
|
||||||
|
of
|
||||||
|
on
|
||||||
|
only
|
||||||
|
or
|
||||||
|
other
|
||||||
|
our
|
||||||
|
out
|
||||||
|
over
|
||||||
|
re
|
||||||
|
said
|
||||||
|
same
|
||||||
|
see
|
||||||
|
should
|
||||||
|
since
|
||||||
|
so
|
||||||
|
some
|
||||||
|
still
|
||||||
|
such
|
||||||
|
take
|
||||||
|
than
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
them
|
||||||
|
then
|
||||||
|
there
|
||||||
|
these
|
||||||
|
they
|
||||||
|
this
|
||||||
|
those
|
||||||
|
through
|
||||||
|
to
|
||||||
|
too
|
||||||
|
under
|
||||||
|
up
|
||||||
|
use
|
||||||
|
very
|
||||||
|
want
|
||||||
|
was
|
||||||
|
way
|
||||||
|
we
|
||||||
|
well
|
||||||
|
were
|
||||||
|
what
|
||||||
|
when
|
||||||
|
where
|
||||||
|
which
|
||||||
|
while
|
||||||
|
who
|
||||||
|
will
|
||||||
|
with
|
||||||
|
would
|
||||||
|
you
|
||||||
|
your
|
||||||
|
|
44724
solr/exampleSolr/conf/syn.txt
Normal file
44724
solr/exampleSolr/conf/syn.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -36,7 +36,14 @@ log4j.rootLogger=INFO, AllAppender
|
||||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN
|
||||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN
|
||||||
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN
|
||||||
|
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument=DEBUG
|
||||||
|
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters=DEBUG
|
||||||
|
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.ContextNodeFields=DEBUG
|
||||||
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=INFO
|
||||||
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=DEBUG
|
||||||
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexWorkerThread=INFO
|
||||||
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.SolrIndexer=INFO
|
||||||
|
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.controller.SolrPagedSearchController=DEBUG
|
||||||
# suppress odd warnings from libraries
|
# suppress odd warnings from libraries
|
||||||
log4j.logger.org.openjena.riot=FATAL
|
log4j.logger.org.openjena.riot=FATAL
|
||||||
log4j.logger.org.directwebremoting=FATAL
|
log4j.logger.org.directwebremoting=FATAL
|
|
@ -24,6 +24,9 @@ public class DisplayVocabulary {
|
||||||
/* Individuals */
|
/* Individuals */
|
||||||
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
|
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
|
||||||
|
|
||||||
|
//bk392 for extracting properties beyond context nodes.
|
||||||
|
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
|
||||||
|
|
||||||
/* Page types */
|
/* Page types */
|
||||||
public static final String PAGE_TYPE = NS + "Page";
|
public static final String PAGE_TYPE = NS + "Page";
|
||||||
public static final String HOME_PAGE_TYPE = NS + "HomePage";
|
public static final String HOME_PAGE_TYPE = NS + "HomePage";
|
||||||
|
@ -38,6 +41,8 @@ public class DisplayVocabulary {
|
||||||
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
|
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
|
||||||
public static final String TITLE = NS + "title";
|
public static final String TITLE = NS + "title";
|
||||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||||
|
//bk392 for extracting properties beyond context nodes.
|
||||||
|
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
|
||||||
|
|
||||||
/* URIs for storing menu.n3 */
|
/* URIs for storing menu.n3 */
|
||||||
public static final String MENU_TEXT_RES = NS + "MenuText";
|
public static final String MENU_TEXT_RES = NS + "MenuText";
|
||||||
|
|
|
@ -780,7 +780,7 @@ public class IndividualJena extends IndividualImpl implements Individual {
|
||||||
Statement stmt = stmtIt.nextStatement();
|
Statement stmt = stmtIt.nextStatement();
|
||||||
if (stmt.getObject().isURIResource()) {
|
if (stmt.getObject().isURIResource()) {
|
||||||
String typeURI = ((Resource)stmt.getObject()).getURI();
|
String typeURI = ((Resource)stmt.getObject()).getURI();
|
||||||
if (pfs.isClassProhibited(typeURI)) {
|
if (pfs.isClassProhibitedFromSearch(typeURI)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1069,7 +1069,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
|
||||||
|
|
||||||
while(itr.hasNext()) {
|
while(itr.hasNext()) {
|
||||||
String typeURI = itr.next().getURI();
|
String typeURI = itr.next().getURI();
|
||||||
if (pfs.isClassProhibited(typeURI)) {
|
if (pfs.isClassProhibitedFromSearch(typeURI)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -324,7 +324,7 @@ public class VClassGroupDaoJena extends JenaBaseDao implements VClassGroupDao {
|
||||||
for (VClassGroup group : groups) {
|
for (VClassGroup group : groups) {
|
||||||
List<VClass> classList = new ArrayList<VClass>();
|
List<VClass> classList = new ArrayList<VClass>();
|
||||||
for (VClass vclass : group.getVitroClassList()) {
|
for (VClass vclass : group.getVitroClassList()) {
|
||||||
if (!pfs.isClassProhibited(vclass.getURI())) {
|
if (!pfs.isClassProhibitedFromSearch(vclass.getURI())) {
|
||||||
classList.add(vclass);
|
classList.add(vclass);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search;
|
||||||
|
|
||||||
|
public class VitroTermNames {
|
||||||
|
/** Id of entity, vclass or tab */
|
||||||
|
public static String URI = "URI";
|
||||||
|
/** lucene document id */
|
||||||
|
public static String DOCID = "DocId";
|
||||||
|
/** java class of the object that the Doc represents. */
|
||||||
|
public static String JCLASS = "JCLASS";
|
||||||
|
/** rdf:type */
|
||||||
|
public static String RDFTYPE = "type";
|
||||||
|
/** rdf:type */
|
||||||
|
public static String CLASSGROUP_URI = "classgroup";
|
||||||
|
/** Modtime from db */
|
||||||
|
public static String MODTIME = "modTime";
|
||||||
|
|
||||||
|
/** time of index in msec since epoc */
|
||||||
|
public static String INDEXEDTIME= "indexedTime";
|
||||||
|
/** timekey of entity in yyyymmddhhmm */
|
||||||
|
public static String TIMEKEY="TIMEKEY";
|
||||||
|
/** time of sunset/end of entity in yyyymmddhhmm */
|
||||||
|
public static String SUNSET="SUNSET";
|
||||||
|
/** time of sunrise/start of entity in yyyymmddhhmm */
|
||||||
|
public static String SUNRISE="SUNRISE";
|
||||||
|
/** entity's moniker */
|
||||||
|
public static String MONIKER="moniker";
|
||||||
|
/** text for 'full text' search, this is stemmed */
|
||||||
|
public static String ALLTEXT = "ALLTEXT";
|
||||||
|
/** text for 'full text' search, this is unstemmed for
|
||||||
|
* use with wildcards and prefix queries */
|
||||||
|
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||||
|
/** class name for storing targeted information **/
|
||||||
|
public static final String targetInfo = "targetInfo";
|
||||||
|
/** keywords */
|
||||||
|
public static final String KEYWORDS = "KEYWORDS";
|
||||||
|
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||||
|
public static final String THUMBNAIL = "THUMBNAIL";
|
||||||
|
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||||
|
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||||
|
/** class names in human readable form of an individual*/
|
||||||
|
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||||
|
/** class names in human readable form of an individual*/
|
||||||
|
public static final String CLASSLOCALNAME = "classLocalName";
|
||||||
|
|
||||||
|
// Fields derived from rdfs:label
|
||||||
|
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||||
|
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||||
|
|
||||||
|
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||||
|
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||||
|
|
||||||
|
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||||
|
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||||
|
|
||||||
|
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||||
|
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||||
|
|
||||||
|
/** field for beta values of all documents **/
|
||||||
|
public static final String BETA = "BETA";
|
||||||
|
public static final String PHI = "PHI";
|
||||||
|
public static final String ADJACENT_NODES = "ADJACENT_NODES";
|
||||||
|
|
||||||
|
/** adding phonetic field **/
|
||||||
|
public static final String ALLTEXT_PHONETIC = "ALLTEXT_PHONETIC";
|
||||||
|
public static final String NAME_PHONETIC = "NAME_PHONETIC";
|
||||||
|
}
|
|
@ -0,0 +1,5 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||||
|
|
||||||
|
public interface ClassProhibitedFromSearch {
|
||||||
|
public boolean isClassProhibitedFromSearch(String classUri);
|
||||||
|
}
|
|
@ -1,67 +1,5 @@
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
public interface IndividualProhibitedFromSearch {
|
||||||
|
public boolean isIndividualProhibited(String uri);
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import com.hp.hpl.jena.ontology.OntModel;
|
|
||||||
import com.hp.hpl.jena.query.Query;
|
|
||||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
|
||||||
import com.hp.hpl.jena.query.QueryFactory;
|
|
||||||
import com.hp.hpl.jena.shared.Lock;
|
|
||||||
import com.hp.hpl.jena.vocabulary.OWL;
|
|
||||||
import com.hp.hpl.jena.vocabulary.RDF;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
|
||||||
|
|
||||||
public class IndividualProhibitedFromSearch {
|
|
||||||
|
|
||||||
protected OntModel fullModel;
|
|
||||||
|
|
||||||
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearch.class);
|
|
||||||
|
|
||||||
|
|
||||||
public IndividualProhibitedFromSearch( ServletContext context ){
|
|
||||||
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isIndividualProhibited(String uri){
|
|
||||||
if( uri == null || uri.isEmpty() )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
boolean prohibited = false;
|
|
||||||
try {
|
|
||||||
fullModel.getLock().enterCriticalSection(Lock.READ);
|
|
||||||
Query query = makeAskQueryForUri( uri );
|
|
||||||
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
|
|
||||||
} finally {
|
|
||||||
fullModel.getLock().leaveCriticalSection();
|
|
||||||
}
|
|
||||||
if( prohibited )
|
|
||||||
log.debug("prohibited " + uri);
|
|
||||||
|
|
||||||
return prohibited;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Query makeAskQueryForUri( String uri ){
|
|
||||||
String queryString =
|
|
||||||
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
|
|
||||||
"ASK { \n" +
|
|
||||||
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
|
|
||||||
" FILTER ( \n" +
|
|
||||||
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
|
|
||||||
" && \n"+
|
|
||||||
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
|
|
||||||
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
|
|
||||||
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
|
|
||||||
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
|
|
||||||
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
|
|
||||||
" )\n" +
|
|
||||||
"}" ;
|
|
||||||
return QueryFactory.create( queryString );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||||
|
|
||||||
|
import javax.servlet.ServletContext;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
import com.hp.hpl.jena.query.Query;
|
||||||
|
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||||
|
import com.hp.hpl.jena.query.QueryFactory;
|
||||||
|
import com.hp.hpl.jena.shared.Lock;
|
||||||
|
import com.hp.hpl.jena.vocabulary.OWL;
|
||||||
|
import com.hp.hpl.jena.vocabulary.RDF;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
|
|
||||||
|
public class IndividualProhibitedFromSearchImpl implements IndividualProhibitedFromSearch {
|
||||||
|
|
||||||
|
protected OntModel fullModel;
|
||||||
|
|
||||||
|
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearchImpl.class);
|
||||||
|
|
||||||
|
public IndividualProhibitedFromSearchImpl( ServletContext context ){
|
||||||
|
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndividualProhibitedFromSearchImpl( OntModel fullModel ){
|
||||||
|
this.fullModel = fullModel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isIndividualProhibited(String uri){
|
||||||
|
if( uri == null || uri.isEmpty() )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
boolean prohibited = false;
|
||||||
|
try {
|
||||||
|
fullModel.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
Query query = makeAskQueryForUri( uri );
|
||||||
|
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
|
||||||
|
} finally {
|
||||||
|
fullModel.getLock().leaveCriticalSection();
|
||||||
|
}
|
||||||
|
if( prohibited )
|
||||||
|
log.debug("prohibited " + uri);
|
||||||
|
|
||||||
|
return prohibited;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query makeAskQueryForUri( String uri ){
|
||||||
|
String queryString =
|
||||||
|
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
|
||||||
|
"ASK { \n" +
|
||||||
|
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
|
||||||
|
" FILTER ( \n" +
|
||||||
|
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
|
||||||
|
" && \n"+
|
||||||
|
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
|
||||||
|
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
|
||||||
|
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
|
||||||
|
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
|
||||||
|
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
|
||||||
|
" )\n" +
|
||||||
|
"}" ;
|
||||||
|
return QueryFactory.create( queryString );
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,7 +25,7 @@ import com.hp.hpl.jena.shared.Lock;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||||
|
|
||||||
public class ProhibitedFromSearch {
|
public class ProhibitedFromSearch implements ClassProhibitedFromSearch{
|
||||||
List<String> prohibitedClasses;
|
List<String> prohibitedClasses;
|
||||||
String ProhibitedFromSearchURI;
|
String ProhibitedFromSearchURI;
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ public class ProhibitedFromSearch {
|
||||||
model.register(new ProhibitedFromSearchChangeListener( this ));
|
model.register(new ProhibitedFromSearchChangeListener( this ));
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized boolean isClassProhibited(String classURI){
|
public synchronized boolean isClassProhibitedFromSearch(String classURI){
|
||||||
if( classURI != null ){
|
if( classURI != null ){
|
||||||
boolean p = prohibitedClasses.contains(classURI);
|
boolean p = prohibitedClasses.contains(classURI);
|
||||||
log.debug( classURI + " is " + (p?"prohibited":"not prohibited"));
|
log.debug( classURI + " is " + (p?"prohibited":"not prohibited"));
|
||||||
|
|
|
@ -49,6 +49,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
|
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
|
||||||
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
|
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
|
||||||
|
@ -178,6 +179,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
||||||
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
|
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
|
||||||
QueryResponse response = null;
|
QueryResponse response = null;
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
response = solr.query(query);
|
response = solr.query(query);
|
||||||
|
|
||||||
|
@ -349,11 +351,11 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
||||||
for(int i=0; i<hitCount && n > grpsFound ;i++){
|
for(int i=0; i<hitCount && n > grpsFound ;i++){
|
||||||
try{
|
try{
|
||||||
SolrDocument doc = docs.get(i);
|
SolrDocument doc = docs.get(i);
|
||||||
Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI);
|
Collection<Object> grps = doc.getFieldValues(VitroTermNames.CLASSGROUP_URI);
|
||||||
if (grps != null) {
|
if (grps != null) {
|
||||||
for (Object o : grps) {
|
for (Object o : grps) {
|
||||||
String groupUri = o.toString();
|
String groupUri = o.toString();
|
||||||
if( groupUri != null && ! classGroupsInHits.contains(groupUri)){
|
if( groupUri != null && !classGroupsInHits.contains(groupUri)){
|
||||||
classGroupsInHits.add(groupUri);
|
classGroupsInHits.add(groupUri);
|
||||||
grpsFound++;
|
grpsFound++;
|
||||||
if( grpsFound >= n )
|
if( grpsFound >= n )
|
||||||
|
@ -364,6 +366,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
log.error("problem getting VClassGroups from search hits "
|
log.error("problem getting VClassGroups from search hits "
|
||||||
+ e.getMessage() );
|
+ e.getMessage() );
|
||||||
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,8 +9,11 @@ import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Stack;
|
||||||
|
import java.util.Queue;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -21,6 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The IndexBuilder is used to rebuild or update a search index.
|
* The IndexBuilder is used to rebuild or update a search index.
|
||||||
|
@ -282,8 +287,29 @@ public class IndexBuilder extends Thread {
|
||||||
* @throws AbortIndexing
|
* @throws AbortIndexing
|
||||||
*/
|
*/
|
||||||
private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{
|
private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{
|
||||||
long starttime = System.currentTimeMillis();
|
|
||||||
long count = 0;
|
|
||||||
|
// long starttime = System.currentTimeMillis();
|
||||||
|
int count = 0;
|
||||||
|
int numOfThreads = 10;
|
||||||
|
|
||||||
|
|
||||||
|
List<IndexWorkerThread> workers = new ArrayList<IndexWorkerThread>();
|
||||||
|
boolean distributing = true;
|
||||||
|
|
||||||
|
for(int i = 0; i< numOfThreads ;i++){
|
||||||
|
workers.add(new IndexWorkerThread(indexer,i,distributing)); // made a pool of workers
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Indexing worker pool ready for indexing.");
|
||||||
|
|
||||||
|
// starting worker threads
|
||||||
|
|
||||||
|
for(int i =0; i < numOfThreads; i++){
|
||||||
|
workers.get(i).start();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
while(individuals.hasNext()){
|
while(individuals.hasNext()){
|
||||||
if( stopRequested )
|
if( stopRequested )
|
||||||
throw new AbortIndexing();
|
throw new AbortIndexing();
|
||||||
|
@ -291,7 +317,11 @@ public class IndexBuilder extends Thread {
|
||||||
Individual ind = null;
|
Individual ind = null;
|
||||||
try{
|
try{
|
||||||
ind = individuals.next();
|
ind = individuals.next();
|
||||||
indexer.index(ind, newDocs);
|
|
||||||
|
//indexer.index(ind);
|
||||||
|
|
||||||
|
workers.get(count%numOfThreads).addToQueue(ind); // adding individual to worker queue.
|
||||||
|
|
||||||
}catch(Throwable ex){
|
}catch(Throwable ex){
|
||||||
if( stopRequested || log == null){//log might be null if system is shutting down.
|
if( stopRequested || log == null){//log might be null if system is shutting down.
|
||||||
throw new AbortIndexing();
|
throw new AbortIndexing();
|
||||||
|
@ -300,21 +330,34 @@ public class IndexBuilder extends Thread {
|
||||||
log.warn("Error indexing individual " + uri + " " + ex.getMessage());
|
log.warn("Error indexing individual " + uri + " " + ex.getMessage());
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
if( log.isDebugEnabled() ){
|
/* if( log.isDebugEnabled() ){
|
||||||
if( (count % 100 ) == 0 && count > 0 ){
|
if( (count % 100 ) == 0 && count > 0 ){
|
||||||
long dt = (System.currentTimeMillis() - starttime);
|
long dt = (System.currentTimeMillis() - starttime);
|
||||||
log.debug("individuals indexed: " + count + " in " + dt + " msec " +
|
log.debug("individuals indexed: " + count + " in " + dt + " msec " +
|
||||||
" time pre individual = " + (dt / count) + " msec" );
|
" time pre individual = " + (dt / count) + " msec" );
|
||||||
}
|
}
|
||||||
|
} */
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i =0 ; i < numOfThreads; i ++){
|
||||||
|
workers.get(i).setDistributing(false);
|
||||||
|
}
|
||||||
|
for(int i =0; i < numOfThreads; i++){
|
||||||
|
try{
|
||||||
|
workers.get(i).join();
|
||||||
|
}catch(InterruptedException e){
|
||||||
|
log.error(e,e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(
|
/* log.info(
|
||||||
"individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" +
|
"individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" +
|
||||||
(count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"")
|
(count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"")
|
||||||
);
|
);*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For a list of individuals, this builds a list of dependent resources and returns it.
|
* For a list of individuals, this builds a list of dependent resources and returns it.
|
||||||
*/
|
*/
|
||||||
|
@ -388,4 +431,6 @@ public class IndexBuilder extends Thread {
|
||||||
private class AbortIndexing extends Exception {
|
private class AbortIndexing extends Exception {
|
||||||
// Just a vanilla exception
|
// Just a vanilla exception
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.indexing;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Queue;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class IndexWorkerThread extends Thread{
|
||||||
|
|
||||||
|
protected IndividualToSolrDocument individualToSolrDoc;
|
||||||
|
private IndexerIface indexer = null;
|
||||||
|
private Log log = LogFactory.getLog(IndexWorkerThread.class);
|
||||||
|
private static long count=0;
|
||||||
|
private Queue<Individual> indQueue = new LinkedList<Individual>();
|
||||||
|
private int threadNum;
|
||||||
|
private static long starttime = 0;
|
||||||
|
private boolean distributing;
|
||||||
|
|
||||||
|
public IndexWorkerThread(IndexerIface indexer, int threadNum,boolean distributing){
|
||||||
|
this.indexer = indexer;
|
||||||
|
this.threadNum = threadNum;
|
||||||
|
this.distributing = distributing;
|
||||||
|
synchronized(this){
|
||||||
|
if(starttime == 0)
|
||||||
|
starttime = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addToQueue(Individual ind){
|
||||||
|
synchronized(indQueue){
|
||||||
|
indQueue.offer(ind);
|
||||||
|
indQueue.notify();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isQueueEmpty(){
|
||||||
|
return indQueue.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDistributing(boolean distributing){
|
||||||
|
this.distributing = distributing;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run(){
|
||||||
|
|
||||||
|
while(this.distributing){
|
||||||
|
synchronized(indQueue){
|
||||||
|
try{
|
||||||
|
while(indQueue.isEmpty() && this.distributing){
|
||||||
|
try{
|
||||||
|
log.debug("Worker number " + threadNum + " waiting on some work to be alloted.");
|
||||||
|
indQueue.wait(1000);
|
||||||
|
}catch(InterruptedException ie){
|
||||||
|
log.error(ie,ie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread.sleep(50); //wait a bit to let a bit more work to come into the queue
|
||||||
|
log.debug("work found for Woker number " + threadNum);
|
||||||
|
addDocsToIndex();
|
||||||
|
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
log.debug("Worker number " + threadNum + " woken up",e);
|
||||||
|
}
|
||||||
|
catch(Throwable e){
|
||||||
|
log.error(e,e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("Worker number " + threadNum + " exiting.");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void addDocsToIndex() throws IndexingException{
|
||||||
|
|
||||||
|
while(!indQueue.isEmpty()){
|
||||||
|
indexer.index(indQueue.poll());
|
||||||
|
synchronized(this){
|
||||||
|
count++;
|
||||||
|
if( log.isInfoEnabled() ){
|
||||||
|
if( (count % 100 ) == 0 && count > 0 ){
|
||||||
|
long dt = (System.currentTimeMillis() - starttime);
|
||||||
|
log.info("individuals indexed: " + count + " in " + dt + " msec " +
|
||||||
|
" time per individual = " + (dt / count) + " msec" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -38,7 +38,7 @@ public interface IndexerIface {
|
||||||
* @param newDoc - if true, just insert doc, if false attempt to update.
|
* @param newDoc - if true, just insert doc, if false attempt to update.
|
||||||
* @throws IndexingException
|
* @throws IndexingException
|
||||||
*/
|
*/
|
||||||
public void index(Individual ind, boolean newDoc)throws IndexingException;
|
public void index(Individual ind)throws IndexingException;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -21,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
|
@ -31,6 +34,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
* be as full as possible.
|
* be as full as possible.
|
||||||
*/
|
*/
|
||||||
public class Entity2LuceneDoc implements Obj2DocIface{
|
public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
|
|
||||||
/** These are the terms for the lucene index */
|
/** These are the terms for the lucene index */
|
||||||
public static class VitroLuceneTermNames{
|
public static class VitroLuceneTermNames{
|
||||||
/** Id of entity, vclass or tab */
|
/** Id of entity, vclass or tab */
|
||||||
|
@ -61,6 +65,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
/** text for 'full text' search, this is unstemmed for
|
/** text for 'full text' search, this is unstemmed for
|
||||||
* use with wildcards and prefix queries */
|
* use with wildcards and prefix queries */
|
||||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||||
|
/** class name for storing context nodes **/
|
||||||
|
public static final String CONTEXTNODE = "contextNode";
|
||||||
/** keywords */
|
/** keywords */
|
||||||
public static final String KEYWORDS = "KEYWORDS";
|
public static final String KEYWORDS = "KEYWORDS";
|
||||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||||
|
@ -108,6 +114,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
|
|
||||||
private IndividualProhibitedFromSearch individualProhibited;
|
private IndividualProhibitedFromSearch individualProhibited;
|
||||||
|
|
||||||
|
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
|
||||||
|
|
||||||
|
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||||
|
|
||||||
public Entity2LuceneDoc(
|
public Entity2LuceneDoc(
|
||||||
ProhibitedFromSearch classesProhibitedFromSearch,
|
ProhibitedFromSearch classesProhibitedFromSearch,
|
||||||
IndividualProhibitedFromSearch individualProhibited){
|
IndividualProhibitedFromSearch individualProhibited){
|
||||||
|
@ -128,7 +138,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
String classPublicNames = "";
|
String classPublicNames = "";
|
||||||
|
|
||||||
|
|
||||||
//DocId
|
//DocId
|
||||||
String id = ent.getURI();
|
String id = ent.getURI();
|
||||||
log.debug("translating " + id);
|
log.debug("translating " + id);
|
||||||
|
@ -162,7 +171,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||||
return null;
|
return null;
|
||||||
}else{
|
}else{
|
||||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()) )
|
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()) )
|
||||||
prohibited = true;
|
prohibited = true;
|
||||||
|
|
||||||
if( clz.getSearchBoost() != null )
|
if( clz.getSearchBoost() != null )
|
||||||
|
@ -197,9 +206,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
doc.add( new Field(term.DOCID, entClassName + id,
|
doc.add( new Field(term.DOCID, entClassName + id,
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
|
|
||||||
//vitro Id
|
//vitro Id
|
||||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
|
|
||||||
//java class
|
//java class
|
||||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
|
@ -212,7 +223,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||||
value = ent.getLocalName();
|
value = ent.getLocalName();
|
||||||
}
|
}
|
||||||
|
|
||||||
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||||
nameRaw.setBoost(NAME_BOOST);
|
nameRaw.setBoost(NAME_BOOST);
|
||||||
doc.add(nameRaw);
|
doc.add(nameRaw);
|
||||||
|
@ -230,6 +240,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
nameStemmed.setBoost(NAME_BOOST);
|
nameStemmed.setBoost(NAME_BOOST);
|
||||||
doc.add(nameStemmed);
|
doc.add(nameStemmed);
|
||||||
|
|
||||||
|
String contextNodePropertyValues;
|
||||||
|
|
||||||
|
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||||
|
/*contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||||
|
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI());
|
||||||
|
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||||
|
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||||
|
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||||
|
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); */
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
/* Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
|
||||||
|
doc.add(contextNodeInformation);*/
|
||||||
|
|
||||||
//Moniker
|
//Moniker
|
||||||
|
|
||||||
|
@ -279,6 +303,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
|
|
||||||
if( ! prohibited ){
|
if( ! prohibited ){
|
||||||
//ALLTEXT, all of the 'full text'
|
//ALLTEXT, all of the 'full text'
|
||||||
String t=null;
|
String t=null;
|
||||||
|
@ -310,6 +335,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
continue;
|
continue;
|
||||||
try {
|
try {
|
||||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||||
|
|
||||||
|
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||||
|
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||||
|
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||||
|
}
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.debug("could not index name of related object: " + e.getMessage());
|
log.debug("could not index name of related object: " + e.getMessage());
|
||||||
}
|
}
|
||||||
|
@ -321,6 +352,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
|
||||||
|
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
|
||||||
|
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -474,4 +474,10 @@ public class LuceneIndexer implements IndexerIface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void index(Individual ind) throws IndexingException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||||
|
@ -38,7 +39,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
|
@ -111,10 +112,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
getAnalyzer());
|
getAnalyzer());
|
||||||
context.setAttribute(ANALYZER, getAnalyzer());
|
context.setAttribute(ANALYZER, getAnalyzer());
|
||||||
|
|
||||||
|
//bk392 adding another argument to Entity2LuceneDoc
|
||||||
|
// that takes care of sparql queries for context nodes.
|
||||||
|
|
||||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||||
new IndividualProhibitedFromSearch(context) );
|
new IndividualProhibitedFromSearchImpl(context)
|
||||||
|
|
||||||
|
);
|
||||||
indexer.addObj2Doc(translator);
|
indexer.addObj2Doc(translator);
|
||||||
|
|
||||||
context.setAttribute(LuceneIndexer.class.getName(), indexer);
|
context.setAttribute(LuceneIndexer.class.getName(), indexer);
|
||||||
|
@ -250,9 +256,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
|
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||||
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
||||||
|
|
||||||
|
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
|
|
||||||
|
@ -92,7 +92,8 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||||
new IndividualProhibitedFromSearch(context) );
|
new IndividualProhibitedFromSearchImpl(context)
|
||||||
|
);
|
||||||
indexer.addObj2Doc(translator);
|
indexer.addObj2Doc(translator);
|
||||||
|
|
||||||
indexer.setLuceneIndexFactory(lif);
|
indexer.setLuceneIndexFactory(lif);
|
||||||
|
|
|
@ -0,0 +1,347 @@
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.query.Dataset;
|
||||||
|
import com.hp.hpl.jena.query.Query;
|
||||||
|
import com.hp.hpl.jena.query.QueryExecution;
|
||||||
|
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||||
|
import com.hp.hpl.jena.query.QueryFactory;
|
||||||
|
import com.hp.hpl.jena.query.QuerySolution;
|
||||||
|
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||||
|
import com.hp.hpl.jena.query.ResultSet;
|
||||||
|
import com.hp.hpl.jena.query.Syntax;
|
||||||
|
import com.hp.hpl.jena.rdf.model.Model;
|
||||||
|
import com.hp.hpl.jena.rdf.model.Property;
|
||||||
|
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||||
|
import com.hp.hpl.jena.rdf.model.Resource;
|
||||||
|
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||||
|
import com.hp.hpl.jena.rdf.model.StmtIterator;
|
||||||
|
import com.hp.hpl.jena.shared.Lock;
|
||||||
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||||
|
|
||||||
|
|
||||||
|
public class CalculateParameters implements DocumentModifier {
|
||||||
|
|
||||||
|
private Dataset dataset;
|
||||||
|
public static int totalInd=1;
|
||||||
|
protected Map<String,Float> betaMap = new Hashtable<String,Float>();
|
||||||
|
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||||
|
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||||
|
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||||
|
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||||
|
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||||
|
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||||
|
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||||
|
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||||
|
|
||||||
|
private static final String betaQuery = prefix + " SELECT count(distinct ?inLinks) " +
|
||||||
|
" WHERE { " +
|
||||||
|
" ?uri rdf:type owl:Thing . " +
|
||||||
|
" ?inLinks ?prop ?uri . " +
|
||||||
|
" } ";
|
||||||
|
|
||||||
|
private static final String totalCountQuery = prefix + " SELECT count(distinct ?ind) " +
|
||||||
|
" WHERE { " +
|
||||||
|
" ?ind rdf:type owl:Thing . " +
|
||||||
|
" } ";
|
||||||
|
|
||||||
|
private static Log log = LogFactory.getLog(CalculateParameters.class);
|
||||||
|
|
||||||
|
private static final String[] fieldsToAddBetaTo = {
|
||||||
|
VitroTermNames.NAME_RAW,
|
||||||
|
VitroTermNames.NAME_LOWERCASE,
|
||||||
|
VitroTermNames.NAME_UNSTEMMED,
|
||||||
|
VitroTermNames.NAME_STEMMED
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final String[] fieldsToMultiplyBetaBy = {
|
||||||
|
VitroTermNames.ALLTEXT,
|
||||||
|
VitroTermNames.ALLTEXTUNSTEMMED,
|
||||||
|
};
|
||||||
|
|
||||||
|
public CalculateParameters(Dataset dataset){
|
||||||
|
this.dataset =dataset;
|
||||||
|
new Thread(new TotalInd(this.dataset,totalCountQuery)).start();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CalculateParameters(){
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
public float calculateBeta(String uri){
|
||||||
|
float beta=0;
|
||||||
|
int Conn=0;
|
||||||
|
Query query;
|
||||||
|
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||||
|
QuerySolution soln = null;
|
||||||
|
Resource uriResource = ResourceFactory.createResource(uri);
|
||||||
|
initialBinding.add("uri", uriResource);
|
||||||
|
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
|
||||||
|
try{
|
||||||
|
query = QueryFactory.create(betaQuery,Syntax.syntaxARQ);
|
||||||
|
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
|
||||||
|
ResultSet results = qexec.execSelect();
|
||||||
|
List<String> resultVars = results.getResultVars();
|
||||||
|
if(resultVars!=null && resultVars.size()!=0){
|
||||||
|
soln = results.next();
|
||||||
|
Conn = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
|
||||||
|
}
|
||||||
|
}catch(Throwable t){
|
||||||
|
log.error(t,t);
|
||||||
|
}finally{
|
||||||
|
dataset.getLock().leaveCriticalSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
beta = (float)Conn/totalInd;
|
||||||
|
beta *= 100;
|
||||||
|
beta += 1;
|
||||||
|
return beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float calculatePhi(StringBuffer adjNodes){
|
||||||
|
|
||||||
|
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
|
||||||
|
String uri=null;
|
||||||
|
int size=0;
|
||||||
|
float phi = 0.1F;
|
||||||
|
while(nodes.hasMoreTokens()){
|
||||||
|
size++;
|
||||||
|
uri = nodes.nextToken();
|
||||||
|
phi += getBeta(uri);
|
||||||
|
}
|
||||||
|
if(size>0)
|
||||||
|
phi = (float)phi/size;
|
||||||
|
else
|
||||||
|
phi = 1;
|
||||||
|
return phi;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized Float getBeta(String uri){
|
||||||
|
|
||||||
|
float beta;
|
||||||
|
if(betaMap.containsKey(uri)){
|
||||||
|
beta = betaMap.get(uri);
|
||||||
|
}else{
|
||||||
|
beta = calculateBeta(uri); // or calculate & put in map
|
||||||
|
betaMap.put(uri, beta);
|
||||||
|
}
|
||||||
|
return beta;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String[] getAdjacentNodes(String uri){
|
||||||
|
|
||||||
|
List<String> queryList = new ArrayList<String>();
|
||||||
|
Set<String> adjacentNodes = new HashSet<String>();
|
||||||
|
Set<String> coauthorNames = new HashSet<String>();
|
||||||
|
String[] info = new String[]{"",""};
|
||||||
|
StringBuffer adjacentNodesConcat = new StringBuffer();
|
||||||
|
StringBuffer coauthorBuff = new StringBuffer();
|
||||||
|
adjacentNodesConcat.append("");
|
||||||
|
coauthorBuff.append("");
|
||||||
|
|
||||||
|
queryList.add(prefix +
|
||||||
|
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
|
||||||
|
" WHERE { " +
|
||||||
|
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
|
||||||
|
" ?uri ?prop ?obj . " +
|
||||||
|
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
|
||||||
|
" ?obj ?prop2 ?obj2 . " +
|
||||||
|
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
|
||||||
|
" ?obj2 ?prop3 ?obj3 . " +
|
||||||
|
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
|
||||||
|
" ?obj3 ?prop4 ?adjobj . " +
|
||||||
|
" ?adjobj rdfs:label ?adjobjLabel . " +
|
||||||
|
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
|
||||||
|
|
||||||
|
" FILTER (?prop !=rdf:type) . " +
|
||||||
|
" FILTER (?prop2!=rdf:type) . " +
|
||||||
|
" FILTER (?prop3!=rdf:type) . " +
|
||||||
|
" FILTER (?prop4!=rdf:type) . " +
|
||||||
|
" FILTER (?adjobj != ?uri) . " +
|
||||||
|
"}");
|
||||||
|
|
||||||
|
queryList.add(prefix +
|
||||||
|
" SELECT ?adjobj " +
|
||||||
|
" WHERE{ " +
|
||||||
|
|
||||||
|
" ?uri rdf:type foaf:Agent . " +
|
||||||
|
" ?uri ?prop ?obj . " +
|
||||||
|
" ?obj ?prop2 ?adjobj . " +
|
||||||
|
|
||||||
|
|
||||||
|
" FILTER (?prop !=rdf:type) . " +
|
||||||
|
" FILTER isURI(?obj) . " +
|
||||||
|
|
||||||
|
" FILTER (?prop2!=rdf:type) . " +
|
||||||
|
" FILTER (?adjobj != ?uri) . " +
|
||||||
|
" FILTER isURI(?adjobj) . " +
|
||||||
|
|
||||||
|
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
|
||||||
|
" UNION " +
|
||||||
|
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
|
||||||
|
" UNION " +
|
||||||
|
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
|
||||||
|
" UNION " +
|
||||||
|
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
|
||||||
|
"}");
|
||||||
|
|
||||||
|
Query query;
|
||||||
|
|
||||||
|
QuerySolution soln;
|
||||||
|
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||||
|
Resource uriResource = ResourceFactory.createResource(uri);
|
||||||
|
|
||||||
|
initialBinding.add("uri", uriResource);
|
||||||
|
|
||||||
|
Iterator<String> queryItr = queryList.iterator();
|
||||||
|
|
||||||
|
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
Resource adjacentIndividual = null;
|
||||||
|
RDFNode coauthor = null;
|
||||||
|
try{
|
||||||
|
while(queryItr.hasNext()){
|
||||||
|
/*if(!isPerson){
|
||||||
|
queryItr.next(); // we don't want first query to execute if the ind is not a person.
|
||||||
|
}*/
|
||||||
|
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
|
||||||
|
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
|
||||||
|
try{
|
||||||
|
ResultSet results = qexec.execSelect();
|
||||||
|
while(results.hasNext()){
|
||||||
|
soln = results.nextSolution();
|
||||||
|
|
||||||
|
adjacentIndividual = (Resource)soln.get("adjobj");
|
||||||
|
if(adjacentIndividual!=null){
|
||||||
|
adjacentNodes.add(adjacentIndividual.getURI());
|
||||||
|
}
|
||||||
|
|
||||||
|
coauthor = soln.get("coauthor");
|
||||||
|
if(coauthor!=null){
|
||||||
|
coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}catch(Exception e){
|
||||||
|
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
|
||||||
|
}finally{
|
||||||
|
qexec.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
queryList = null;
|
||||||
|
Iterator<String> itr = adjacentNodes.iterator();
|
||||||
|
while(itr.hasNext()){
|
||||||
|
adjacentNodesConcat.append(itr.next() + " ");
|
||||||
|
}
|
||||||
|
|
||||||
|
info[0] = adjacentNodesConcat.toString();
|
||||||
|
|
||||||
|
itr = coauthorNames.iterator();
|
||||||
|
while(itr.hasNext()){
|
||||||
|
coauthorBuff.append(itr.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
info[1] = coauthorBuff.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
catch(Throwable t){
|
||||||
|
log.error(t,t);
|
||||||
|
}finally{
|
||||||
|
dataset.getLock().leaveCriticalSection();
|
||||||
|
adjacentNodes = null;
|
||||||
|
adjacentNodesConcat = null;
|
||||||
|
coauthorBuff = null;
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
// calculate beta value.
|
||||||
|
log.debug("Parameter calculation starts..");
|
||||||
|
|
||||||
|
String uri = individual.getURI();
|
||||||
|
String adjInfo[] = getAdjacentNodes(uri);
|
||||||
|
StringBuffer info = new StringBuffer();
|
||||||
|
info.append(adjInfo[0]);
|
||||||
|
info.append(addUri.toString());
|
||||||
|
float phi = calculatePhi(info);
|
||||||
|
|
||||||
|
for(String term: fieldsToAddBetaTo){
|
||||||
|
SolrInputField f = doc.getField( term );
|
||||||
|
f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(String term: fieldsToMultiplyBetaBy){
|
||||||
|
SolrInputField f = doc.getField( term );
|
||||||
|
f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
|
||||||
|
}
|
||||||
|
|
||||||
|
SolrInputField f = doc.getField(VitroTermNames.targetInfo);
|
||||||
|
f.addValue(adjInfo[1],f.getBoost());
|
||||||
|
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
|
||||||
|
|
||||||
|
log.debug("Parameter calculation is done");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clearMap(){
|
||||||
|
betaMap.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class TotalInd implements Runnable{
|
||||||
|
private Dataset dataset;
|
||||||
|
private String totalCountQuery;
|
||||||
|
private static Log log = LogFactory.getLog(TotalInd.class);
|
||||||
|
|
||||||
|
public TotalInd(Dataset dataset,String totalCountQuery){
|
||||||
|
this.dataset = dataset;
|
||||||
|
this.totalCountQuery = totalCountQuery;
|
||||||
|
|
||||||
|
}
|
||||||
|
public void run(){
|
||||||
|
int totalInd=0;
|
||||||
|
Query query;
|
||||||
|
QuerySolution soln = null;
|
||||||
|
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
|
||||||
|
try{
|
||||||
|
query = QueryFactory.create(totalCountQuery,Syntax.syntaxARQ);
|
||||||
|
QueryExecution qexec = QueryExecutionFactory.create(query,dataset);
|
||||||
|
ResultSet results = qexec.execSelect();
|
||||||
|
List<String> resultVars = results.getResultVars();
|
||||||
|
|
||||||
|
if(resultVars!=null && resultVars.size()!=0){
|
||||||
|
soln = results.next();
|
||||||
|
totalInd = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
|
||||||
|
}
|
||||||
|
CalculateParameters.totalInd = totalInd;
|
||||||
|
log.info("Total number of individuals in the system are : " + CalculateParameters.totalInd);
|
||||||
|
}catch(Throwable t){
|
||||||
|
log.error(t,t);
|
||||||
|
}finally{
|
||||||
|
dataset.getLock().leaveCriticalSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,452 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.query.Dataset;
|
||||||
|
import com.hp.hpl.jena.query.Query;
|
||||||
|
import com.hp.hpl.jena.query.QueryExecution;
|
||||||
|
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||||
|
import com.hp.hpl.jena.query.QueryFactory;
|
||||||
|
import com.hp.hpl.jena.query.QuerySolution;
|
||||||
|
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||||
|
import com.hp.hpl.jena.query.ResultSet;
|
||||||
|
import com.hp.hpl.jena.query.Syntax;
|
||||||
|
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||||
|
import com.hp.hpl.jena.rdf.model.Resource;
|
||||||
|
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||||
|
import com.hp.hpl.jena.shared.Lock;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||||
|
|
||||||
|
public class ContextNodeFields implements DocumentModifier{
|
||||||
|
|
||||||
|
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||||
|
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||||
|
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||||
|
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||||
|
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||||
|
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||||
|
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||||
|
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||||
|
|
||||||
|
private static final List<String> singleValuedQueriesForAgent = new ArrayList<String>();
|
||||||
|
private static final List<String> singleValuedQueriesForInformationResource = new ArrayList<String>();
|
||||||
|
private static final List<String> multiValuedQueriesForAgent = new ArrayList<String>();
|
||||||
|
private static final String multiValuedQueryForInformationResource;
|
||||||
|
// private static StringBuffer objectProperties = new StringBuffer();
|
||||||
|
|
||||||
|
private Log log = LogFactory.getLog(ContextNodeFields.class);
|
||||||
|
private Dataset dataset;
|
||||||
|
|
||||||
|
|
||||||
|
public ContextNodeFields(Dataset dataset){
|
||||||
|
this.dataset = dataset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: consider a constructor like this:
|
||||||
|
* public ContextNodeFields(OntModel fullModel, List<String> queries )
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
*TODO:
|
||||||
|
* consider reducing the code in this class using a method like the following:
|
||||||
|
*/
|
||||||
|
public StringBuffer runQuery( Individual individual, String query ){
|
||||||
|
StringBuffer propertyValues = new StringBuffer();
|
||||||
|
|
||||||
|
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||||
|
Resource uriResource = ResourceFactory.createResource(individual.getURI());
|
||||||
|
initialBinding.add("uri", uriResource);
|
||||||
|
|
||||||
|
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
|
||||||
|
dataset.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
try{
|
||||||
|
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
|
||||||
|
try{
|
||||||
|
ResultSet results = qExec.execSelect();
|
||||||
|
while(results.hasNext()){
|
||||||
|
QuerySolution soln = results.nextSolution();
|
||||||
|
Iterator<String> iter = soln.varNames() ;
|
||||||
|
while( iter.hasNext()){
|
||||||
|
String name = iter.next();
|
||||||
|
RDFNode node = soln.get( name );
|
||||||
|
if( node != null ){
|
||||||
|
propertyValues.append(" " + node.toString());
|
||||||
|
}else{
|
||||||
|
log.debug(name + " is null");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}catch(Throwable t){
|
||||||
|
log.error(t,t);
|
||||||
|
} finally{
|
||||||
|
qExec.close();
|
||||||
|
}
|
||||||
|
}finally{
|
||||||
|
dataset.getLock().leaveCriticalSection();
|
||||||
|
}
|
||||||
|
|
||||||
|
return propertyValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
|
||||||
|
|
||||||
|
log.debug("retrieving context node values..");
|
||||||
|
|
||||||
|
SolrInputField field = doc.getField(VitroTermNames.ALLTEXT);
|
||||||
|
SolrInputField targetField = doc.getField(VitroTermNames.targetInfo);
|
||||||
|
StringBuffer objectProperties = new StringBuffer();
|
||||||
|
|
||||||
|
|
||||||
|
objectProperties.append(" ");
|
||||||
|
|
||||||
|
int threadCount = multiValuedQueriesForAgent.size();
|
||||||
|
QueryRunner[] threads = new QueryRunner[threadCount];
|
||||||
|
|
||||||
|
|
||||||
|
//Make a thread for each query and start it.
|
||||||
|
for(int i= 0; i < threadCount; i++){
|
||||||
|
QueryRunner t = new QueryRunner(individual, multiValuedQueriesForAgent.get(i));
|
||||||
|
t.start();
|
||||||
|
threads[i] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Wait for each thread to finish and collect results
|
||||||
|
for(int i = 0 ; i < threadCount ; i++){
|
||||||
|
try {
|
||||||
|
threads[i].join();
|
||||||
|
objectProperties.append( threads[i].getPropertyValues() ) ;
|
||||||
|
threads[i] = null;
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
log.error("Thread " + threads[i].getName() + " interrupted!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
targetField.addValue(" " + runQuery(individual, multiValuedQueryForInformationResource), targetField.getBoost());
|
||||||
|
|
||||||
|
|
||||||
|
field.addValue(objectProperties, field.getBoost());
|
||||||
|
log.debug("context node values are retrieved");
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//single valued queries for foaf:Agent
|
||||||
|
static {
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Position . " +
|
||||||
|
" ?c core:hrJobTitle ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Position . " +
|
||||||
|
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Position . " +
|
||||||
|
" ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Position . " +
|
||||||
|
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Position . " +
|
||||||
|
" ?c core:titleOrRole ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Relationship . " +
|
||||||
|
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Relationship . " +
|
||||||
|
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Relationship . " +
|
||||||
|
" ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:Relationship . " +
|
||||||
|
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:AwardReceipt . " +
|
||||||
|
" ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:AwardReceipt . " +
|
||||||
|
" ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT " +
|
||||||
|
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
|
||||||
|
"?uri rdf:type foaf:Agent ; ?b ?c . " +
|
||||||
|
" ?c rdf:type core:AwardReceipt . " +
|
||||||
|
" ?c core:description ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||||
|
+ " ?Organization rdfs:label ?ContextNodeProperty . "
|
||||||
|
+ " } ORDER BY ?ContextNodeProperty ");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
+ " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ."
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
+ " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ."
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
+ " ?c core:majorField ?ContextNodeProperty ."
|
||||||
|
+ " }");
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
+ " ?c core:departmentOrSchool ?ContextNodeProperty ."
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
+ " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . "
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//single valued queries for core:InformationResource
|
||||||
|
static {
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ " ?uri rdf:type core:InformationResource . "
|
||||||
|
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
|
||||||
|
+ "?b rdfs:label ?ContextNodeProperty .}");
|
||||||
|
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ " ?uri rdf:type core:InformationResource . "
|
||||||
|
+ " ?uri core:linkedInformationResource ?d ."
|
||||||
|
+ " ?d rdfs:label ?ContextNodeProperty . }");
|
||||||
|
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type core:InformationResource . "
|
||||||
|
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
|
||||||
|
+ "}");
|
||||||
|
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type core:InformationResource . "
|
||||||
|
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
|
||||||
|
+ "}");
|
||||||
|
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type core:InformationResource . "
|
||||||
|
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
|
||||||
|
+ "}");
|
||||||
|
|
||||||
|
singleValuedQueriesForInformationResource.add(prefix +
|
||||||
|
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
|
||||||
|
+ "?uri rdf:type core:InformationResource . "
|
||||||
|
+ "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ."
|
||||||
|
+ "}");
|
||||||
|
}
|
||||||
|
|
||||||
|
//multi valued queries
|
||||||
|
|
||||||
|
static{
|
||||||
|
multiValuedQueriesForAgent.add(prefix +
|
||||||
|
"SELECT " +
|
||||||
|
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
|
||||||
|
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
|
||||||
|
" (str(?TitleOrRole) as ?titleOrRole) WHERE {"
|
||||||
|
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:Position . "
|
||||||
|
|
||||||
|
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
|
||||||
|
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
|
||||||
|
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
|
||||||
|
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
|
||||||
|
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
multiValuedQueriesForAgent.add(prefix +
|
||||||
|
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
|
||||||
|
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
|
||||||
|
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:Relationship . "
|
||||||
|
|
||||||
|
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
|
||||||
|
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
|
||||||
|
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
|
||||||
|
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
|
||||||
|
|
||||||
|
+ " } ");
|
||||||
|
|
||||||
|
multiValuedQueriesForAgent.add(prefix +
|
||||||
|
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
|
||||||
|
" (str(?Description) as ?description) WHERE {"
|
||||||
|
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:AwardReceipt . "
|
||||||
|
|
||||||
|
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
|
||||||
|
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
|
||||||
|
+ " OPTIONAL { ?c core:description ?Description . } . "
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
multiValuedQueriesForAgent.add(prefix +
|
||||||
|
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
|
||||||
|
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||||
|
+ " ?Organization rdfs:label ?OrganizationLabel . "
|
||||||
|
+ " }");
|
||||||
|
|
||||||
|
multiValuedQueriesForAgent.add(prefix +
|
||||||
|
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
|
||||||
|
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
|
||||||
|
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
|
||||||
|
|
||||||
|
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||||
|
+ " ?c rdf:type core:EducationalTraining . "
|
||||||
|
|
||||||
|
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
|
||||||
|
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
|
||||||
|
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
|
||||||
|
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
|
||||||
|
|
||||||
|
+"}");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//multivalued query for core:InformationResource
|
||||||
|
static {
|
||||||
|
|
||||||
|
multiValuedQueryForInformationResource = prefix +
|
||||||
|
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
|
||||||
|
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
|
||||||
|
"(str(?Features) as ?features) WHERE {"
|
||||||
|
|
||||||
|
+ " ?uri rdf:type core:InformationResource . "
|
||||||
|
|
||||||
|
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
|
||||||
|
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
|
||||||
|
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
|
||||||
|
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
|
||||||
|
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
|
||||||
|
|
||||||
|
+"}" ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private class QueryRunner extends Thread{
|
||||||
|
|
||||||
|
private Individual ind;
|
||||||
|
private String query;
|
||||||
|
private StringBuffer propertyValues = new StringBuffer();
|
||||||
|
|
||||||
|
public String getPropertyValues(){
|
||||||
|
return propertyValues.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryRunner(Individual ind, String query){
|
||||||
|
this.ind = ind;
|
||||||
|
this.query = query;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void run(){
|
||||||
|
// StringBuffer propertyValues = new StringBuffer();
|
||||||
|
|
||||||
|
propertyValues.append(runQuery(ind, query));
|
||||||
|
|
||||||
|
|
||||||
|
// QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||||
|
// Resource uriResource = ResourceFactory.createResource(ind.getURI());
|
||||||
|
// initialBinding.add("uri", uriResource);
|
||||||
|
//
|
||||||
|
// Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
|
||||||
|
// dataset.getLock().enterCriticalSection(Lock.READ);
|
||||||
|
// try{
|
||||||
|
// QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
|
||||||
|
// try{
|
||||||
|
// ResultSet results = qExec.execSelect();
|
||||||
|
// while(results.hasNext()){
|
||||||
|
// QuerySolution soln = results.nextSolution();
|
||||||
|
// Iterator<String> iter = soln.varNames() ;
|
||||||
|
// while( iter.hasNext()){
|
||||||
|
// String name = iter.next();
|
||||||
|
// RDFNode node = soln.get( name );
|
||||||
|
// if( node != null ){
|
||||||
|
// propertyValues.append(" " + node.toString());
|
||||||
|
// }else{
|
||||||
|
// log.debug(name + " is null");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }catch(Throwable t){
|
||||||
|
// log.error(t,t);
|
||||||
|
// } finally{
|
||||||
|
// qExec.close();
|
||||||
|
// }
|
||||||
|
// }finally{
|
||||||
|
// dataset.getLock().leaveCriticalSection();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//objectProperties.append(propertyValues.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This interface represents an object that can add to a SolrInputDocument.
|
||||||
|
*/
|
||||||
|
public interface DocumentModifier {
|
||||||
|
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri);
|
||||||
|
|
||||||
|
}
|
|
@ -2,46 +2,330 @@
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||||
|
|
||||||
import org.apache.solr.common.SolrDocument;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.vocabulary.OWL;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||||
|
|
||||||
public class IndividualToSolrDocument implements Obj2DocIface {
|
public class IndividualToSolrDocument {
|
||||||
|
|
||||||
protected LuceneDocToSolrDoc luceneToSolr;
|
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
|
||||||
protected Entity2LuceneDoc entityToLucene;
|
|
||||||
|
|
||||||
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
public static VitroTermNames term = new VitroTermNames();
|
||||||
entityToLucene = e2d;
|
|
||||||
luceneToSolr = new LuceneDocToSolrDoc();
|
private static String entClassName = Individual.class.getName();
|
||||||
|
|
||||||
|
private ClassProhibitedFromSearch classesProhibitedFromSearch;
|
||||||
|
|
||||||
|
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
|
||||||
|
|
||||||
|
public List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
|
||||||
|
|
||||||
|
private static List<String> contextNodeClassNames = new ArrayList<String>();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public IndividualToSolrDocument(
|
||||||
|
ClassProhibitedFromSearch classesProhibitedFromSearch,
|
||||||
|
IndividualProhibitedFromSearch individualProhibitedFromSearch){
|
||||||
|
|
||||||
|
this( classesProhibitedFromSearch,
|
||||||
|
individualProhibitedFromSearch,
|
||||||
|
Collections.EMPTY_LIST);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public IndividualToSolrDocument(
|
||||||
public boolean canTranslate(Object obj) {
|
ClassProhibitedFromSearch classesProhibitedFromSearch,
|
||||||
return obj != null && obj instanceof Individual;
|
IndividualProhibitedFromSearch individualProhibitedFromSearch,
|
||||||
|
List<DocumentModifier> docModifiers){
|
||||||
|
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||||
|
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
|
||||||
|
this.documentModifiers = docModifiers;
|
||||||
|
fillContextNodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@SuppressWarnings("static-access")
|
||||||
public boolean canUnTranslate(Object result) {
|
public SolrInputDocument translate(Individual ind) throws IndexingException{
|
||||||
return result != null && result instanceof SolrDocument;
|
long tProhibited = System.currentTimeMillis();
|
||||||
|
ArrayList<String> superClassNames = null;
|
||||||
|
StringBuffer addUri = null;
|
||||||
|
String value;
|
||||||
|
StringBuffer classPublicNames = new StringBuffer();
|
||||||
|
classPublicNames.append("");
|
||||||
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
|
|
||||||
|
//DocId
|
||||||
|
String id = ind.getURI();
|
||||||
|
log.debug("translating " + id);
|
||||||
|
|
||||||
|
if(id == null){
|
||||||
|
log.debug("cannot add individuals without URIs to lucene Index");
|
||||||
|
return null;
|
||||||
|
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
|
||||||
|
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
|
||||||
|
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
|
||||||
|
id.startsWith(OWL.NS)){
|
||||||
|
log.debug("not indexing because of namespace:" + id);
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
//filter out class groups, owl:ObjectProperties etc..
|
||||||
|
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
|
||||||
|
|
||||||
|
// Types and classgroups
|
||||||
|
boolean prohibited = false;
|
||||||
|
List<VClass> vclasses = ind.getVClasses(false);
|
||||||
|
superClassNames = new ArrayList<String>();
|
||||||
|
String superLclName = null;
|
||||||
|
long tClassgroup = System.currentTimeMillis();
|
||||||
|
for(VClass clz : vclasses){
|
||||||
|
superLclName = clz.getLocalName();
|
||||||
|
superClassNames.add(superLclName);
|
||||||
|
if(clz.getURI() == null){
|
||||||
|
continue;
|
||||||
|
}else if(OWL.Thing.getURI().equals(clz.getURI())){
|
||||||
|
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
|
||||||
|
continue;
|
||||||
|
} else if(clz.getURI().startsWith(OWL.NS)){
|
||||||
|
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||||
|
return null;
|
||||||
|
} else if(contextNodeClassNames.contains(superLclName)) { // check to see if context node is being indexed.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()))
|
||||||
|
prohibited = true;
|
||||||
|
if( clz.getSearchBoost() != null)
|
||||||
|
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
|
||||||
|
|
||||||
|
doc.addField(term.RDFTYPE, clz.getURI());
|
||||||
|
|
||||||
|
if(clz.getLocalName() != null){
|
||||||
|
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
|
||||||
|
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
if(clz.getName() != null){
|
||||||
|
classPublicNames.append(" ");
|
||||||
|
classPublicNames.append(clz.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
//Classgroup URI
|
||||||
|
if(clz.getGroupURI() != null){
|
||||||
|
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(superClassNames.isEmpty()){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
|
||||||
|
|
||||||
|
|
||||||
|
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
|
||||||
|
|
||||||
|
//lucene DocID
|
||||||
|
doc.addField(term.DOCID, entClassName + id);
|
||||||
|
|
||||||
|
//vitro id
|
||||||
|
doc.addField(term.URI, id);
|
||||||
|
|
||||||
|
//java class
|
||||||
|
doc.addField(term.JCLASS, entClassName);
|
||||||
|
|
||||||
|
//Individual Label
|
||||||
|
if(ind.getRdfsLabel() != null)
|
||||||
|
value = ind.getRdfsLabel();
|
||||||
|
else{
|
||||||
|
log.debug("Using local name for individual with rdfs:label " + ind.getURI());
|
||||||
|
value = ind.getLocalName();
|
||||||
|
}
|
||||||
|
|
||||||
|
// collecting object property statements
|
||||||
|
|
||||||
|
String uri = ind.getURI();
|
||||||
|
StringBuffer objectNames = new StringBuffer();
|
||||||
|
objectNames.append("");
|
||||||
|
String t=null;
|
||||||
|
addUri = new StringBuffer();
|
||||||
|
addUri.append("");
|
||||||
|
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
|
||||||
|
if (objectPropertyStatements != null) {
|
||||||
|
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||||
|
while (objectPropertyStmtIter.hasNext()) {
|
||||||
|
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||||
|
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||||
|
continue;
|
||||||
|
try {
|
||||||
|
objectNames.append(" ");
|
||||||
|
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
|
||||||
|
addUri.append(" ");
|
||||||
|
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("could not index name of related object: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||||
|
doc.addField(term.NAME_RAW, value, NAME_BOOST);
|
||||||
|
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
|
||||||
|
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
|
||||||
|
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
|
||||||
|
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||||
|
}else{
|
||||||
|
doc.addField(term.NAME_RAW, value);
|
||||||
|
doc.addField(term.NAME_LOWERCASE, value.toLowerCase());
|
||||||
|
doc.addField(term.NAME_UNSTEMMED, value);
|
||||||
|
doc.addField(term.NAME_STEMMED, value);
|
||||||
|
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
long tMoniker = System.currentTimeMillis();
|
||||||
|
|
||||||
|
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||||
|
//boost for entity
|
||||||
|
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
|
||||||
|
doc.setDocumentBoost(ind.getSearchBoost());
|
||||||
|
}
|
||||||
|
|
||||||
|
//thumbnail
|
||||||
|
try{
|
||||||
|
value = null;
|
||||||
|
if(ind.hasThumb())
|
||||||
|
doc.addField(term.THUMBNAIL, "1");
|
||||||
|
else
|
||||||
|
doc.addField(term.THUMBNAIL, "0");
|
||||||
|
}catch(Exception ex){
|
||||||
|
log.debug("could not index thumbnail: " + ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//time of index in millis past epoc
|
||||||
|
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||||
|
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
|
||||||
|
|
||||||
|
log.debug("time to include thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
|
||||||
|
|
||||||
|
long tPropertyStatements = System.currentTimeMillis();
|
||||||
|
|
||||||
|
//collecting data property statements
|
||||||
|
|
||||||
|
if(!prohibited){
|
||||||
|
//ALLTEXT, all of the 'full text'
|
||||||
|
StringBuffer allTextValue = new StringBuffer();
|
||||||
|
allTextValue.append("");
|
||||||
|
allTextValue.append(" ");
|
||||||
|
allTextValue.append(((t=ind.getName()) == null)?"":t);
|
||||||
|
allTextValue.append(" ");
|
||||||
|
allTextValue.append(((t=ind.getAnchor()) == null)?"":t);
|
||||||
|
allTextValue.append(" ");
|
||||||
|
allTextValue.append(classPublicNames.toString());
|
||||||
|
|
||||||
|
List<DataPropertyStatement> dataPropertyStatements = ind.getDataPropertyStatements();
|
||||||
|
if (dataPropertyStatements != null) {
|
||||||
|
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||||
|
while (dataPropertyStmtIter.hasNext()) {
|
||||||
|
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||||
|
allTextValue.append(" ");
|
||||||
|
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
allTextValue.append(objectNames.toString());
|
||||||
|
|
||||||
|
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
|
||||||
|
|
||||||
|
String alltext = allTextValue.toString();
|
||||||
|
doc.addField(term.ALLTEXT, alltext);
|
||||||
|
doc.addField(term.ALLTEXTUNSTEMMED, alltext);
|
||||||
|
doc.addField(term.ALLTEXT_PHONETIC, alltext,PHONETIC_BOOST);
|
||||||
|
|
||||||
|
//run the document modifiers
|
||||||
|
if( documentModifiers != null && !documentModifiers.isEmpty()){
|
||||||
|
doc.addField(term.targetInfo,"");
|
||||||
|
for(DocumentModifier modifier: documentModifiers){
|
||||||
|
modifier.modifyDocument(ind, doc, addUri);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public Object getIndexId(Object obj) {
|
public Object getIndexId(Object obj) {
|
||||||
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public Individual unTranslate(Object result) {
|
||||||
public Object translate(Object obj) throws IndexingException {
|
Individual ent = null;
|
||||||
return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
if( result != null && result instanceof Document){
|
||||||
|
Document hit = (Document) result;
|
||||||
|
String id = hit.get(term.URI);
|
||||||
|
ent = new IndividualImpl();
|
||||||
|
ent.setURI(id);
|
||||||
|
}
|
||||||
|
return ent;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private void fillContextNodes(){
|
||||||
public Object unTranslate(Object result) {
|
this.contextNodeClassNames.add("Role");
|
||||||
return luceneToSolr.unTranslate( result );
|
this.contextNodeClassNames.add("AttendeeRole");
|
||||||
|
this.contextNodeClassNames.add("ClinicalRole");
|
||||||
|
this.contextNodeClassNames.add("LeaderRole");
|
||||||
|
this.contextNodeClassNames.add("MemberRole");
|
||||||
|
this.contextNodeClassNames.add("OutreachProviderRole");
|
||||||
|
this.contextNodeClassNames.add("PresenterRole");
|
||||||
|
this.contextNodeClassNames.add("ResearcherRole");
|
||||||
|
this.contextNodeClassNames.add("InvestigatorRole");
|
||||||
|
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
|
||||||
|
this.contextNodeClassNames.add("PrincipalInvestigatorRole");
|
||||||
|
this.contextNodeClassNames.add("ServiceProviderRole");
|
||||||
|
this.contextNodeClassNames.add("TeacherRole");
|
||||||
|
this.contextNodeClassNames.add("Position");
|
||||||
|
this.contextNodeClassNames.add("FacultyAdministrativePosition");
|
||||||
|
this.contextNodeClassNames.add("FacultyPosition");
|
||||||
|
this.contextNodeClassNames.add("LibrarianPosition");
|
||||||
|
this.contextNodeClassNames.add("Non-AcademicPosition");
|
||||||
|
this.contextNodeClassNames.add("Non-FacultyAcademicPosition");
|
||||||
|
this.contextNodeClassNames.add("PostdoctoralPosition");
|
||||||
|
this.contextNodeClassNames.add("AdvisingRelationship");
|
||||||
|
this.contextNodeClassNames.add("Authorship");
|
||||||
|
this.contextNodeClassNames.add("AcademicDegree");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static float NAME_BOOST = 2.0F;
|
||||||
|
public static float ALL_TEXT_BOOST = 2.5F;
|
||||||
|
public static float PHONETIC_BOOST = 0.1F;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,62 +0,0 @@
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.solr.common.SolrDocument;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Translate a lucene Document into a SolrDocument.
|
|
||||||
*/
|
|
||||||
public class LuceneDocToSolrDoc implements Obj2DocIface {
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canTranslate(Object obj) {
|
|
||||||
return obj != null && obj instanceof Document;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canUnTranslate(Object result) {
|
|
||||||
return result != null && result instanceof SolrDocument;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getIndexId(Object obj) {
|
|
||||||
//"this method isn't useful for solr"
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object translate(Object obj) throws IndexingException {
|
|
||||||
Document luceneDoc = (Document)obj;
|
|
||||||
SolrInputDocument solrDoc = new SolrInputDocument();
|
|
||||||
|
|
||||||
for( Object f : luceneDoc.getFields()){
|
|
||||||
Field field = (Field)f;
|
|
||||||
solrDoc.addField( new String(field.name()), field.stringValue() );
|
|
||||||
}
|
|
||||||
return solrDoc;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object unTranslate(Object result) {
|
|
||||||
Individual ind = null;
|
|
||||||
if( result != null && result instanceof SolrDocument){
|
|
||||||
SolrDocument hit = (SolrDocument)result;
|
|
||||||
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
|
|
||||||
ind = new IndividualImpl();
|
|
||||||
ind.setURI(id);
|
|
||||||
}
|
|
||||||
return ind;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -6,37 +6,36 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.solr.client.solrj.SolrServer;
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.client.solrj.response.UpdateResponse;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
|
||||||
|
|
||||||
public class SolrIndexer implements IndexerIface {
|
public class SolrIndexer implements IndexerIface {
|
||||||
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
||||||
|
|
||||||
protected SolrServer server;
|
protected SolrServer server;
|
||||||
protected boolean indexing;
|
protected boolean indexing;
|
||||||
protected List<Obj2DocIface> obj2DocList;
|
|
||||||
protected HashSet<String> urisIndexed;
|
protected HashSet<String> urisIndexed;
|
||||||
|
protected IndividualToSolrDocument individualToSolrDoc;
|
||||||
|
|
||||||
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){
|
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
|
||||||
this.server = server;
|
this.server = server;
|
||||||
this.obj2DocList = o2d;
|
this.individualToSolrDoc = indToDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException {
|
public void index(Individual ind) throws IndexingException {
|
||||||
|
|
||||||
if( ! indexing )
|
if( ! indexing )
|
||||||
throw new IndexingException("SolrIndexer: must call " +
|
throw new IndexingException("SolrIndexer: must call " +
|
||||||
"startIndexing() before index().");
|
"startIndexing() before index().");
|
||||||
|
@ -49,38 +48,31 @@ public class SolrIndexer implements IndexerIface {
|
||||||
log.debug("already indexed " + ind.getURI() );
|
log.debug("already indexed " + ind.getURI() );
|
||||||
return;
|
return;
|
||||||
}else{
|
}else{
|
||||||
|
SolrInputDocument solrDoc = null;
|
||||||
|
synchronized(this){
|
||||||
urisIndexed.add(ind.getURI());
|
urisIndexed.add(ind.getURI());
|
||||||
|
}
|
||||||
log.debug("indexing " + ind.getURI());
|
log.debug("indexing " + ind.getURI());
|
||||||
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
|
// synchronized(individualToSolrDoc){
|
||||||
while (it.hasNext()) {
|
solrDoc = individualToSolrDoc.translate(ind);
|
||||||
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
|
// }
|
||||||
if (obj2doc.canTranslate(ind)) {
|
|
||||||
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
|
|
||||||
if( solrDoc != null){
|
if( solrDoc != null){
|
||||||
//sending each doc individually is inefficient
|
//sending each doc individually is inefficient
|
||||||
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
||||||
docs.add( solrDoc );
|
// docs.add( solrDoc );
|
||||||
server.add( docs );
|
UpdateResponse res = server.add( solrDoc );
|
||||||
// if( !newDoc ){
|
log.debug("response after adding docs to server: "+ res);
|
||||||
// server.add( docs );
|
|
||||||
// log.debug("updated " + ind.getName() + " " + ind.getURI());
|
|
||||||
// }else{
|
|
||||||
// server.add( docs );
|
|
||||||
// log.debug("added " + ind.getName() + " " + ind.getURI());
|
|
||||||
// }
|
|
||||||
}else{
|
}else{
|
||||||
log.debug("removing from index " + ind.getURI());
|
log.debug("removing from index " + ind.getURI());
|
||||||
|
//TODO: how do we delete document?
|
||||||
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new IndexingException(ex.getMessage());
|
throw new IndexingException(ex.getMessage());
|
||||||
} catch (SolrServerException ex) {
|
} catch (SolrServerException ex) {
|
||||||
throw new IndexingException(ex.getMessage());
|
throw new IndexingException(ex.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -115,12 +107,12 @@ public class SolrIndexer implements IndexerIface {
|
||||||
|
|
||||||
|
|
||||||
public synchronized void addObj2Doc(Obj2DocIface o2d) {
|
public synchronized void addObj2Doc(Obj2DocIface o2d) {
|
||||||
if (o2d != null)
|
//no longer used
|
||||||
obj2DocList.add(o2d);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized List<Obj2DocIface> getObj2DocList() {
|
public synchronized List<Obj2DocIface> getObj2DocList() {
|
||||||
return obj2DocList;
|
//no longer used
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -131,11 +123,21 @@ public class SolrIndexer implements IndexerIface {
|
||||||
@Override
|
@Override
|
||||||
public synchronized void endIndexing() {
|
public synchronized void endIndexing() {
|
||||||
try {
|
try {
|
||||||
server.commit();
|
UpdateResponse res = server.commit();
|
||||||
} catch (Exception e) {
|
log.debug("Response after committing to server: "+ res );
|
||||||
|
} catch (SolrServerException e) {
|
||||||
log.error("Could not commit to solr server", e);
|
log.error("Could not commit to solr server", e);
|
||||||
|
} catch(IOException e){
|
||||||
|
log.error("Could not commit to solr server", e);
|
||||||
|
}finally{
|
||||||
|
if(!individualToSolrDoc.documentModifiers.isEmpty()){
|
||||||
|
if(individualToSolrDoc.documentModifiers.get(0) instanceof CalculateParameters){
|
||||||
|
CalculateParameters c = (CalculateParameters) individualToSolrDoc.documentModifiers.get(0);
|
||||||
|
c.clearMap();
|
||||||
|
log.info("BetaMap cleared");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
server.optimize();
|
server.optimize();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -14,6 +14,7 @@ import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
|
||||||
|
|
||||||
import com.hp.hpl.jena.ontology.OntModel;
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
import com.hp.hpl.jena.query.Dataset;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||||
|
@ -21,14 +22,14 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||||
|
|
||||||
|
@ -57,8 +58,8 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
||||||
}
|
}
|
||||||
CommonsHttpSolrServer server;
|
CommonsHttpSolrServer server;
|
||||||
server = new CommonsHttpSolrServer( solrServerUrl );
|
server = new CommonsHttpSolrServer( solrServerUrl );
|
||||||
server.setSoTimeout(1000); // socket read timeout
|
server.setSoTimeout(10000); // socket read timeout
|
||||||
server.setConnectionTimeout(100);
|
server.setConnectionTimeout(10000);
|
||||||
server.setDefaultMaxConnectionsPerHost(100);
|
server.setDefaultMaxConnectionsPerHost(100);
|
||||||
server.setMaxTotalConnections(100);
|
server.setMaxTotalConnections(100);
|
||||||
server.setMaxRetries(1);
|
server.setMaxRetries(1);
|
||||||
|
@ -67,15 +68,24 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
||||||
/* setup the individual to solr doc translation */
|
/* setup the individual to solr doc translation */
|
||||||
//first we need a ent2luceneDoc translator
|
//first we need a ent2luceneDoc translator
|
||||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||||
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
|
||||||
|
OntModel abox = ModelContext.getBaseOntModelSelector(context).getABoxModel();
|
||||||
|
|
||||||
|
OntModel inferences = (OntModel)context.getAttribute( JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME);
|
||||||
|
Dataset dataset = WebappDaoFactoryJena.makeInMemoryDataset(abox, inferences);
|
||||||
|
|
||||||
|
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
|
||||||
|
// modifiers.add(new CalculateParameters(ModelContext.getJenaOntModel(context)));
|
||||||
|
modifiers.add(new CalculateParameters(dataset));
|
||||||
|
modifiers.add(new ContextNodeFields(dataset));
|
||||||
|
|
||||||
|
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
|
||||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||||
new IndividualProhibitedFromSearch(context) );
|
new IndividualProhibitedFromSearchImpl(context),
|
||||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
modifiers);
|
||||||
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
|
|
||||||
o2d.add(indToSolrDoc);
|
|
||||||
|
|
||||||
/* setup solr indexer */
|
/* setup solr indexer */
|
||||||
SolrIndexer solrIndexer = new SolrIndexer(server, o2d);
|
SolrIndexer solrIndexer = new SolrIndexer(server, indToSolrDoc);
|
||||||
if( solrIndexer.isIndexEmpty() ){
|
if( solrIndexer.isIndexEmpty() ){
|
||||||
log.info("solr index is empty, requesting rebuild");
|
log.info("solr index is empty, requesting rebuild");
|
||||||
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
||||||
|
|
|
@ -41,8 +41,8 @@ public class ProhibitedFromSearchTest {
|
||||||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( SEARCH_CONFIG_URI , m);
|
ProhibitedFromSearch pfs = new ProhibitedFromSearch( SEARCH_CONFIG_URI , m);
|
||||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||||
Assert.assertTrue(pfs.prohibitedClasses.size() == 4);
|
Assert.assertTrue(pfs.prohibitedClasses.size() == 4);
|
||||||
Assert.assertTrue(pfs.isClassProhibited(TEST_CLASS));
|
Assert.assertTrue(pfs.isClassProhibitedFromSearch(TEST_CLASS));
|
||||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -55,8 +55,8 @@ public class ProhibitedFromSearchTest {
|
||||||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( "http://NotFound.com/inModel", m);
|
ProhibitedFromSearch pfs = new ProhibitedFromSearch( "http://NotFound.com/inModel", m);
|
||||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||||
Assert.assertTrue(pfs.prohibitedClasses.size() == 0);
|
Assert.assertTrue(pfs.prohibitedClasses.size() == 0);
|
||||||
Assert.assertTrue(!pfs.isClassProhibited(TEST_CLASS));
|
Assert.assertTrue(!pfs.isClassProhibitedFromSearch(TEST_CLASS));
|
||||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ public class ProhibitedFromSearchTest {
|
||||||
ProhibitedFromSearch pfs = new ProhibitedFromSearch( DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, m);
|
ProhibitedFromSearch pfs = new ProhibitedFromSearch( DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, m);
|
||||||
Assert.assertNotNull(pfs.prohibitedClasses);
|
Assert.assertNotNull(pfs.prohibitedClasses);
|
||||||
Assert.assertEquals(1, pfs.prohibitedClasses.size() );
|
Assert.assertEquals(1, pfs.prohibitedClasses.size() );
|
||||||
Assert.assertTrue(pfs.isClassProhibited("http://vivoweb.org/ontology/core#NonAcademic"));
|
Assert.assertTrue(pfs.isClassProhibitedFromSearch("http://vivoweb.org/ontology/core#NonAcademic"));
|
||||||
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test"));
|
Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue