Committing changes with merge from search relevance branch.

This commit is contained in:
anupsawant 2011-06-23 15:21:33 +00:00
commit 692446335e
31 changed files with 48715 additions and 2514 deletions

View file

@ -222,9 +222,9 @@
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time -->
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
-->
<!-- Case insensitive stop word removal. <!-- Case insensitive stop word removal.
add enablePositionIncrements=true in both the index and query add enablePositionIncrements=true in both the index and query
analyzers to leave a 'gap' for more accurate phrase queries. analyzers to leave a 'gap' for more accurate phrase queries.
@ -237,12 +237,12 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- <filter class="solr.PorterStemFilterFactory"/> --> <filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" <filter class="solr.StopFilterFactory"
ignoreCase="true" ignoreCase="true"
words="stopwords.txt" words="stopwords.txt"
@ -251,6 +251,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/> <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -381,7 +382,7 @@
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" > <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> <filter class="solr.PhoneticFilterFactory" encoder="Metaphone" inject="false"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
@ -472,36 +473,50 @@
<!-- **************************** Vitro Fields *************************** --> <!-- **************************** Vitro Fields *************************** -->
<field name="DocId" type="string" indexed="true" stored="true" required="true" /> <field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/>
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> <field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/> <field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/> <field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/> <field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/> <field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/> <field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/> <field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? --> <!-- RY Not sure if we need to store nameLowercase -->
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/> <field name="nameLowercase" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- A sortable version of nameLowercase --> <!-- A sortable version of nameLowercase -->
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" /> <field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/> <field name="nameUnstemmed" type="lowercase" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/> <field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- <!--
<field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/> <field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/> <field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/>
--> -->
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/> <field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXT_PHONETIC" type="phonetic" indexed="true" stored="false" multiValued="true"/>
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/> <field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
<field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- <field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
<field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
<field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
<field name="moniker" type="ignored" /> <field name="moniker" type="ignored" />
<field name="modType" type="ignored"/> <field name="modType" type="ignored"/>
<field name="JCLASS" type="ignored"/> <field name="JCLASS" type="ignored"/>
<!-- Copy nameLowercase to sortable field. --> <!-- Copy nameLowercase to sortable field. -->
<copyField source="nameLowercase" dest="nameLowercaseSingleValued" /> <copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
<!-- **************************** End Vitro Fields *************************** --> <!-- **************************** End Vitro Fields *************************** -->
@ -571,7 +586,7 @@
<uniqueKey>DocId</uniqueKey> <uniqueKey>DocId</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent --> <!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>ALLTEXT</defaultSearchField> <!-- <defaultSearchField>ALLTEXT</defaultSearchField> -->
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="AND"/> <solrQueryParser defaultOperator="AND"/>
@ -594,7 +609,8 @@
<!-- Similarity is the scoring routine for each document vs. a query. <!-- Similarity is the scoring routine for each document vs. a query.
A custom similarity may be specified here, but the default is fine A custom similarity may be specified here, but the default is fine
for most applications. --> for most applications. -->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> --> <similarity class="org.apache.lucene.search.DefaultSimilarity"/>
<!-- <similarity class="edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity"/> -->
<!-- ... OR ... <!-- ... OR ...
Specify a SimilarityFactory class name implementation Specify a SimilarityFactory class name implementation
allowing parameters to be used. allowing parameters to be used.

View file

@ -385,6 +385,7 @@
be based on the last SolrCore to be initialized. be based on the last SolrCore to be initialized.
--> -->
-->
<!-- Increasing to handle large wildcard queries used in IndividualListController. <!-- Increasing to handle large wildcard queries used in IndividualListController.
See VIVO-384. --> See VIVO-384. -->
<maxBooleanClauses>50000</maxBooleanClauses> <maxBooleanClauses>50000</maxBooleanClauses>
@ -706,8 +707,15 @@
will be overridden by parameters in the request will be overridden by parameters in the request
--> -->
<lst name="defaults"> <lst name="defaults">
<str name="defType">edismax</str>
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
<str name="pf">targetInfo</str>
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="ps">2</str>
<str name="qs">2</str>
<int name="rows">10</int> <int name="rows">10</int>
<str name="q.alt">*:*</str>
<str name="fl">*,score</str>
</lst> </lst>
<!-- In addition to defaults, "appends" params can be specified <!-- In addition to defaults, "appends" params can be specified
to identify values which should be appended to the list of to identify values which should be appended to the list of
@ -783,10 +791,12 @@
<str name="v.layout">layout</str> <str name="v.layout">layout</str>
<str name="title">Solritas</str> <str name="title">Solritas</str>
<str name="defType">edismax</str> <!-- <str name="defType">edismax</str> -->
<str name="q.alt">*:*</str> <str name="q.alt">*:*</str>
<str name="rows">10</str> <str name="rows">10</str>
<str name="fl">*,score</str> <str name="fl">*,score</str>
<str name="mlt.qf"> <str name="mlt.qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str> </str>
@ -1395,7 +1405,6 @@
</fragmentsBuilder> </fragmentsBuilder>
</highlighting> </highlighting>
</searchComponent> </searchComponent>
<!-- Autocomplete --> <!-- Autocomplete -->
<!-- <!--
<searchComponent class="solr.SpellCheckComponent" name="suggest"> <searchComponent class="solr.SpellCheckComponent" name="suggest">
@ -1435,7 +1444,6 @@
</arr> </arr>
</requestHandler> </requestHandler>
--> -->
<!-- Update Processors <!-- Update Processors
Chains of Update Processor Factories for dealing with Update Chains of Update Processor Factories for dealing with Update

View file

@ -56,3 +56,122 @@ was
will will
with with
# these stopwords are taken
# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2
about
after
all
also
an
and
another
any
are
as
at
be
because
been
before
being
between
both
but
by
came
can
come
could
did
do
does
each
else
for
from
get
got
has
had
he
have
her
here
him
himself
his
how
if
in
into
is
it
its
just
like
make
many
me
might
more
most
much
must
my
never
now
of
on
only
or
other
our
out
over
re
said
same
see
should
since
so
some
still
such
take
than
that
the
their
them
then
there
these
they
this
those
through
to
too
under
up
use
very
want
was
way
we
well
were
what
when
where
which
while
who
will
with
would
you
your

44724
solr/exampleSolr/conf/syn.txt Normal file

File diff suppressed because it is too large Load diff

View file

@ -36,7 +36,14 @@ log4j.rootLogger=INFO, AllAppender
log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument=DEBUG
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters=DEBUG
#log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.ContextNodeFields=DEBUG
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=INFO
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder=DEBUG
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.indexing.IndexWorkerThread=INFO
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.solr.SolrIndexer=INFO
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.controller.SolrPagedSearchController=DEBUG
# suppress odd warnings from libraries # suppress odd warnings from libraries
log4j.logger.org.openjena.riot=FATAL log4j.logger.org.openjena.riot=FATAL
log4j.logger.org.directwebremoting=FATAL log4j.logger.org.directwebremoting=FATAL

View file

@ -24,6 +24,9 @@ public class DisplayVocabulary {
/* Individuals */ /* Individuals */
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex"; public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
//bk392 for extracting properties beyond context nodes.
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
/* Page types */ /* Page types */
public static final String PAGE_TYPE = NS + "Page"; public static final String PAGE_TYPE = NS + "Page";
public static final String HOME_PAGE_TYPE = NS + "HomePage"; public static final String HOME_PAGE_TYPE = NS + "HomePage";
@ -38,6 +41,8 @@ public class DisplayVocabulary {
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping"); public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
public static final String TITLE = NS + "title"; public static final String TITLE = NS + "title";
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate"); public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
//bk392 for extracting properties beyond context nodes.
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
/* URIs for storing menu.n3 */ /* URIs for storing menu.n3 */
public static final String MENU_TEXT_RES = NS + "MenuText"; public static final String MENU_TEXT_RES = NS + "MenuText";

View file

@ -780,7 +780,7 @@ public class IndividualJena extends IndividualImpl implements Individual {
Statement stmt = stmtIt.nextStatement(); Statement stmt = stmtIt.nextStatement();
if (stmt.getObject().isURIResource()) { if (stmt.getObject().isURIResource()) {
String typeURI = ((Resource)stmt.getObject()).getURI(); String typeURI = ((Resource)stmt.getObject()).getURI();
if (pfs.isClassProhibited(typeURI)) { if (pfs.isClassProhibitedFromSearch(typeURI)) {
return true; return true;
} }
} }

View file

@ -1069,7 +1069,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
while(itr.hasNext()) { while(itr.hasNext()) {
String typeURI = itr.next().getURI(); String typeURI = itr.next().getURI();
if (pfs.isClassProhibited(typeURI)) { if (pfs.isClassProhibitedFromSearch(typeURI)) {
return true; return true;
} }
} }

View file

@ -324,7 +324,7 @@ public class VClassGroupDaoJena extends JenaBaseDao implements VClassGroupDao {
for (VClassGroup group : groups) { for (VClassGroup group : groups) {
List<VClass> classList = new ArrayList<VClass>(); List<VClass> classList = new ArrayList<VClass>();
for (VClass vclass : group.getVitroClassList()) { for (VClass vclass : group.getVitroClassList()) {
if (!pfs.isClassProhibited(vclass.getURI())) { if (!pfs.isClassProhibitedFromSearch(vclass.getURI())) {
classList.add(vclass); classList.add(vclass);
} }
} }

View file

@ -0,0 +1,66 @@
package edu.cornell.mannlib.vitro.webapp.search;
public class VitroTermNames {
/** Id of entity, vclass or tab */
public static String URI = "URI";
/** lucene document id */
public static String DOCID = "DocId";
/** java class of the object that the Doc represents. */
public static String JCLASS = "JCLASS";
/** rdf:type */
public static String RDFTYPE = "type";
/** rdf:type */
public static String CLASSGROUP_URI = "classgroup";
/** Modtime from db */
public static String MODTIME = "modTime";
/** time of index in msec since epoc */
public static String INDEXEDTIME= "indexedTime";
/** timekey of entity in yyyymmddhhmm */
public static String TIMEKEY="TIMEKEY";
/** time of sunset/end of entity in yyyymmddhhmm */
public static String SUNSET="SUNSET";
/** time of sunrise/start of entity in yyyymmddhhmm */
public static String SUNRISE="SUNRISE";
/** entity's moniker */
public static String MONIKER="moniker";
/** text for 'full text' search, this is stemmed */
public static String ALLTEXT = "ALLTEXT";
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing targeted information **/
public static final String targetInfo = "targetInfo";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
public static final String THUMBNAIL = "THUMBNAIL";
/** Should individual be included in full text search results? 1=yes 0=no */
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAME = "classLocalName";
// Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME
/** field for beta values of all documents **/
public static final String BETA = "BETA";
public static final String PHI = "PHI";
public static final String ADJACENT_NODES = "ADJACENT_NODES";
/** adding phonetic field **/
public static final String ALLTEXT_PHONETIC = "ALLTEXT_PHONETIC";
public static final String NAME_PHONETIC = "NAME_PHONETIC";
}

View file

@ -0,0 +1,5 @@
package edu.cornell.mannlib.vitro.webapp.search.beans;
public interface ClassProhibitedFromSearch {
public boolean isClassProhibitedFromSearch(String classUri);
}

View file

@ -1,67 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.beans; package edu.cornell.mannlib.vitro.webapp.search.beans;
import javax.servlet.ServletContext; public interface IndividualProhibitedFromSearch {
public boolean isIndividualProhibited(String uri);
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class IndividualProhibitedFromSearch {
protected OntModel fullModel;
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearch.class);
public IndividualProhibitedFromSearch( ServletContext context ){
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
}
public boolean isIndividualProhibited(String uri){
if( uri == null || uri.isEmpty() )
return true;
boolean prohibited = false;
try {
fullModel.getLock().enterCriticalSection(Lock.READ);
Query query = makeAskQueryForUri( uri );
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
} finally {
fullModel.getLock().leaveCriticalSection();
}
if( prohibited )
log.debug("prohibited " + uri);
return prohibited;
}
private Query makeAskQueryForUri( String uri ){
String queryString =
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
"ASK { \n" +
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
" FILTER ( \n" +
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
" && \n"+
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
" )\n" +
"}" ;
return QueryFactory.create( queryString );
}
} }

View file

@ -0,0 +1,70 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.beans;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class IndividualProhibitedFromSearchImpl implements IndividualProhibitedFromSearch {
protected OntModel fullModel;
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearchImpl.class);
public IndividualProhibitedFromSearchImpl( ServletContext context ){
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
}
public IndividualProhibitedFromSearchImpl( OntModel fullModel ){
this.fullModel = fullModel;
}
public boolean isIndividualProhibited(String uri){
if( uri == null || uri.isEmpty() )
return true;
boolean prohibited = false;
try {
fullModel.getLock().enterCriticalSection(Lock.READ);
Query query = makeAskQueryForUri( uri );
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
} finally {
fullModel.getLock().leaveCriticalSection();
}
if( prohibited )
log.debug("prohibited " + uri);
return prohibited;
}
private Query makeAskQueryForUri( String uri ){
String queryString =
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
"ASK { \n" +
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
" FILTER ( \n" +
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
" && \n"+
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
" )\n" +
"}" ;
return QueryFactory.create( queryString );
}
}

View file

@ -25,7 +25,7 @@ import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
public class ProhibitedFromSearch { public class ProhibitedFromSearch implements ClassProhibitedFromSearch{
List<String> prohibitedClasses; List<String> prohibitedClasses;
String ProhibitedFromSearchURI; String ProhibitedFromSearchURI;
@ -41,7 +41,7 @@ public class ProhibitedFromSearch {
model.register(new ProhibitedFromSearchChangeListener( this )); model.register(new ProhibitedFromSearchChangeListener( this ));
} }
public synchronized boolean isClassProhibited(String classURI){ public synchronized boolean isClassProhibitedFromSearch(String classURI){
if( classURI != null ){ if( classURI != null ){
boolean p = prohibitedClasses.contains(classURI); boolean p = prohibitedClasses.contains(classURI);
log.debug( classURI + " is " + (p?"prohibited":"not prohibited")); log.debug( classURI + " is " + (p?"prohibited":"not prohibited"));

View file

@ -49,6 +49,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel; import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -178,6 +179,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
SolrServer solr = SolrSetup.getSolrServer(getServletContext()); SolrServer solr = SolrSetup.getSolrServer(getServletContext());
QueryResponse response = null; QueryResponse response = null;
try { try {
response = solr.query(query); response = solr.query(query);
@ -349,7 +351,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i=0; i<hitCount && n > grpsFound ;i++){ for(int i=0; i<hitCount && n > grpsFound ;i++){
try{ try{
SolrDocument doc = docs.get(i); SolrDocument doc = docs.get(i);
Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI); Collection<Object> grps = doc.getFieldValues(VitroTermNames.CLASSGROUP_URI);
if (grps != null) { if (grps != null) {
for (Object o : grps) { for (Object o : grps) {
String groupUri = o.toString(); String groupUri = o.toString();
@ -364,6 +366,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
} catch(Exception e) { } catch(Exception e) {
log.error("problem getting VClassGroups from search hits " log.error("problem getting VClassGroups from search hits "
+ e.getMessage() ); + e.getMessage() );
e.printStackTrace();
} }
} }

View file

@ -9,8 +9,11 @@ import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Stack;
import java.util.Queue;
import javax.servlet.ServletContext; import javax.servlet.ServletContext;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -21,6 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
/** /**
* The IndexBuilder is used to rebuild or update a search index. * The IndexBuilder is used to rebuild or update a search index.
@ -282,8 +287,29 @@ public class IndexBuilder extends Thread {
* @throws AbortIndexing * @throws AbortIndexing
*/ */
private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{ private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{
long starttime = System.currentTimeMillis();
long count = 0;
// long starttime = System.currentTimeMillis();
int count = 0;
int numOfThreads = 10;
List<IndexWorkerThread> workers = new ArrayList<IndexWorkerThread>();
boolean distributing = true;
for(int i = 0; i< numOfThreads ;i++){
workers.add(new IndexWorkerThread(indexer,i,distributing)); // made a pool of workers
}
log.info("Indexing worker pool ready for indexing.");
// starting worker threads
for(int i =0; i < numOfThreads; i++){
workers.get(i).start();
}
while(individuals.hasNext()){ while(individuals.hasNext()){
if( stopRequested ) if( stopRequested )
throw new AbortIndexing(); throw new AbortIndexing();
@ -291,7 +317,11 @@ public class IndexBuilder extends Thread {
Individual ind = null; Individual ind = null;
try{ try{
ind = individuals.next(); ind = individuals.next();
indexer.index(ind, newDocs);
//indexer.index(ind);
workers.get(count%numOfThreads).addToQueue(ind); // adding individual to worker queue.
}catch(Throwable ex){ }catch(Throwable ex){
if( stopRequested || log == null){//log might be null if system is shutting down. if( stopRequested || log == null){//log might be null if system is shutting down.
throw new AbortIndexing(); throw new AbortIndexing();
@ -300,21 +330,34 @@ public class IndexBuilder extends Thread {
log.warn("Error indexing individual " + uri + " " + ex.getMessage()); log.warn("Error indexing individual " + uri + " " + ex.getMessage());
} }
count++; count++;
if( log.isDebugEnabled() ){ /* if( log.isDebugEnabled() ){
if( (count % 100 ) == 0 && count > 0 ){ if( (count % 100 ) == 0 && count > 0 ){
long dt = (System.currentTimeMillis() - starttime); long dt = (System.currentTimeMillis() - starttime);
log.debug("individuals indexed: " + count + " in " + dt + " msec " + log.debug("individuals indexed: " + count + " in " + dt + " msec " +
" time pre individual = " + (dt / count) + " msec" ); " time pre individual = " + (dt / count) + " msec" );
} }
} */
}
for(int i =0 ; i < numOfThreads; i ++){
workers.get(i).setDistributing(false);
}
for(int i =0; i < numOfThreads; i++){
try{
workers.get(i).join();
}catch(InterruptedException e){
log.error(e,e);
} }
} }
log.info( /* log.info(
"individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" + "individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" +
(count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"") (count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"")
); );*/
} }
/** /**
* For a list of individuals, this builds a list of dependent resources and returns it. * For a list of individuals, this builds a list of dependent resources and returns it.
*/ */
@ -388,4 +431,6 @@ public class IndexBuilder extends Thread {
private class AbortIndexing extends Exception { private class AbortIndexing extends Exception {
// Just a vanilla exception // Just a vanilla exception
} }
} }

View file

@ -0,0 +1,97 @@
package edu.cornell.mannlib.vitro.webapp.search.indexing;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
class IndexWorkerThread extends Thread{
protected IndividualToSolrDocument individualToSolrDoc;
private IndexerIface indexer = null;
private Log log = LogFactory.getLog(IndexWorkerThread.class);
private static long count=0;
private Queue<Individual> indQueue = new LinkedList<Individual>();
private int threadNum;
private static long starttime = 0;
private boolean distributing;
public IndexWorkerThread(IndexerIface indexer, int threadNum,boolean distributing){
this.indexer = indexer;
this.threadNum = threadNum;
this.distributing = distributing;
synchronized(this){
if(starttime == 0)
starttime = System.currentTimeMillis();
}
}
public void addToQueue(Individual ind){
synchronized(indQueue){
indQueue.offer(ind);
indQueue.notify();
}
}
public boolean isQueueEmpty(){
return indQueue.isEmpty();
}
public void setDistributing(boolean distributing){
this.distributing = distributing;
}
public void run(){
while(this.distributing){
synchronized(indQueue){
try{
while(indQueue.isEmpty() && this.distributing){
try{
log.debug("Worker number " + threadNum + " waiting on some work to be alloted.");
indQueue.wait(1000);
}catch(InterruptedException ie){
log.error(ie,ie);
}
}
Thread.sleep(50); //wait a bit to let a bit more work to come into the queue
log.debug("work found for Woker number " + threadNum);
addDocsToIndex();
} catch (InterruptedException e) {
log.debug("Worker number " + threadNum + " woken up",e);
}
catch(Throwable e){
log.error(e,e);
}
}
}
log.info("Worker number " + threadNum + " exiting.");
}
protected void addDocsToIndex() throws IndexingException{
while(!indQueue.isEmpty()){
indexer.index(indQueue.poll());
synchronized(this){
count++;
if( log.isInfoEnabled() ){
if( (count % 100 ) == 0 && count > 0 ){
long dt = (System.currentTimeMillis() - starttime);
log.info("individuals indexed: " + count + " in " + dt + " msec " +
" time per individual = " + (dt / count) + " msec" );
}
}
}
}
}
}

View file

@ -38,7 +38,7 @@ public interface IndexerIface {
* @param newDoc - if true, just insert doc, if false attempt to update. * @param newDoc - if true, just insert doc, if false attempt to update.
* @throws IndexingException * @throws IndexingException
*/ */
public void index(Individual ind, boolean newDoc)throws IndexingException; public void index(Individual ind)throws IndexingException;
/** /**

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.search.lucene; package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -21,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
@ -31,6 +34,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
* be as full as possible. * be as full as possible.
*/ */
public class Entity2LuceneDoc implements Obj2DocIface{ public class Entity2LuceneDoc implements Obj2DocIface{
/** These are the terms for the lucene index */ /** These are the terms for the lucene index */
public static class VitroLuceneTermNames{ public static class VitroLuceneTermNames{
/** Id of entity, vclass or tab */ /** Id of entity, vclass or tab */
@ -61,6 +65,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
/** text for 'full text' search, this is unstemmed for /** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */ * use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED"; public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing context nodes **/
public static final String CONTEXTNODE = "contextNode";
/** keywords */ /** keywords */
public static final String KEYWORDS = "KEYWORDS"; public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */ /** Does the individual have a thumbnail image? 1=yes 0=no */
@ -108,6 +114,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
private IndividualProhibitedFromSearch individualProhibited; private IndividualProhibitedFromSearch individualProhibited;
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
private static HashSet<String> objectProperties = new HashSet<String>();
public Entity2LuceneDoc( public Entity2LuceneDoc(
ProhibitedFromSearch classesProhibitedFromSearch, ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibited){ IndividualProhibitedFromSearch individualProhibited){
@ -128,7 +138,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Document doc = new Document(); Document doc = new Document();
String classPublicNames = ""; String classPublicNames = "";
//DocId //DocId
String id = ent.getURI(); String id = ent.getURI();
log.debug("translating " + id); log.debug("translating " + id);
@ -162,7 +171,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("not indexing " + id + " because of type " + clz.getURI()); log.debug("not indexing " + id + " because of type " + clz.getURI());
return null; return null;
}else{ }else{
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()) ) if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()) )
prohibited = true; prohibited = true;
if( clz.getSearchBoost() != null ) if( clz.getSearchBoost() != null )
@ -197,9 +206,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{
doc.add( new Field(term.DOCID, entClassName + id, doc.add( new Field(term.DOCID, entClassName + id,
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//vitro Id //vitro Id
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//java class //java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
@ -212,7 +223,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("Using local name for individual with rdfs:label " + ent.getURI()); log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName(); value = ent.getLocalName();
} }
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED); Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
nameRaw.setBoost(NAME_BOOST); nameRaw.setBoost(NAME_BOOST);
doc.add(nameRaw); doc.add(nameRaw);
@ -230,6 +240,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
nameStemmed.setBoost(NAME_BOOST); nameStemmed.setBoost(NAME_BOOST);
doc.add(nameStemmed); doc.add(nameStemmed);
String contextNodePropertyValues;
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
/*contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); */
// }
/* Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
doc.add(contextNodeInformation);*/
//Moniker //Moniker
@ -279,6 +303,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ), doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
if( ! prohibited ){ if( ! prohibited ){
//ALLTEXT, all of the 'full text' //ALLTEXT, all of the 'full text'
String t=null; String t=null;
@ -310,6 +335,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{
continue; continue;
try { try {
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
}
} catch (Exception e) { } catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage()); log.debug("could not index name of related object: " + e.getMessage());
} }
@ -321,6 +352,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED)); doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
} }
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
return doc; return doc;
} }

View file

@ -474,4 +474,10 @@ public class LuceneIndexer implements IndexerIface {
} }
} }
} }
@Override
public void index(Individual ind) throws IndexingException {
// TODO Auto-generated method stub
}
} }

View file

@ -6,6 +6,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
@ -38,7 +39,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
@ -111,10 +112,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
getAnalyzer()); getAnalyzer());
context.setAttribute(ANALYZER, getAnalyzer()); context.setAttribute(ANALYZER, getAnalyzer());
//bk392 adding another argument to Entity2LuceneDoc
// that takes care of sparql queries for context nodes.
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc( Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) ); new IndividualProhibitedFromSearchImpl(context)
);
indexer.addObj2Doc(translator); indexer.addObj2Doc(translator);
context.setAttribute(LuceneIndexer.class.getName(), indexer); context.setAttribute(LuceneIndexer.class.getName(), indexer);
@ -250,9 +256,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
return analyzer; return analyzer;
} }

View file

@ -27,7 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
@ -92,7 +92,8 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc( Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) ); new IndividualProhibitedFromSearchImpl(context)
);
indexer.addObj2Doc(translator); indexer.addObj2Doc(translator);
indexer.setLuceneIndexFactory(lif); indexer.setLuceneIndexFactory(lif);

View file

@ -0,0 +1,347 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.ontology.OntModel;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
public class CalculateParameters implements DocumentModifier {
private Dataset dataset;
public static int totalInd=1;
protected Map<String,Float> betaMap = new Hashtable<String,Float>();
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
private static final String betaQuery = prefix + " SELECT count(distinct ?inLinks) " +
" WHERE { " +
" ?uri rdf:type owl:Thing . " +
" ?inLinks ?prop ?uri . " +
" } ";
private static final String totalCountQuery = prefix + " SELECT count(distinct ?ind) " +
" WHERE { " +
" ?ind rdf:type owl:Thing . " +
" } ";
private static Log log = LogFactory.getLog(CalculateParameters.class);
private static final String[] fieldsToAddBetaTo = {
VitroTermNames.NAME_RAW,
VitroTermNames.NAME_LOWERCASE,
VitroTermNames.NAME_UNSTEMMED,
VitroTermNames.NAME_STEMMED
};
private static final String[] fieldsToMultiplyBetaBy = {
VitroTermNames.ALLTEXT,
VitroTermNames.ALLTEXTUNSTEMMED,
};
public CalculateParameters(Dataset dataset){
this.dataset =dataset;
new Thread(new TotalInd(this.dataset,totalCountQuery)).start();
}
public CalculateParameters(){
super();
}
public float calculateBeta(String uri){
float beta=0;
int Conn=0;
Query query;
QuerySolutionMap initialBinding = new QuerySolutionMap();
QuerySolution soln = null;
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
dataset.getLock().enterCriticalSection(Lock.READ);
try{
query = QueryFactory.create(betaQuery,Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
ResultSet results = qexec.execSelect();
List<String> resultVars = results.getResultVars();
if(resultVars!=null && resultVars.size()!=0){
soln = results.next();
Conn = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
}
}catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
}
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public float calculatePhi(StringBuffer adjNodes){
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
String uri=null;
int size=0;
float phi = 0.1F;
while(nodes.hasMoreTokens()){
size++;
uri = nodes.nextToken();
phi += getBeta(uri);
}
if(size>0)
phi = (float)phi/size;
else
phi = 1;
return phi;
}
public synchronized Float getBeta(String uri){
float beta;
if(betaMap.containsKey(uri)){
beta = betaMap.get(uri);
}else{
beta = calculateBeta(uri); // or calculate & put in map
betaMap.put(uri, beta);
}
return beta;
}
public String[] getAdjacentNodes(String uri){
List<String> queryList = new ArrayList<String>();
Set<String> adjacentNodes = new HashSet<String>();
Set<String> coauthorNames = new HashSet<String>();
String[] info = new String[]{"",""};
StringBuffer adjacentNodesConcat = new StringBuffer();
StringBuffer coauthorBuff = new StringBuffer();
adjacentNodesConcat.append("");
coauthorBuff.append("");
queryList.add(prefix +
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
" WHERE { " +
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" ?uri ?prop ?obj . " +
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj ?prop2 ?obj2 . " +
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
" ?obj2 ?prop3 ?obj3 . " +
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj3 ?prop4 ?adjobj . " +
" ?adjobj rdfs:label ?adjobjLabel . " +
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?prop3!=rdf:type) . " +
" FILTER (?prop4!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
"}");
queryList.add(prefix +
" SELECT ?adjobj " +
" WHERE{ " +
" ?uri rdf:type foaf:Agent . " +
" ?uri ?prop ?obj . " +
" ?obj ?prop2 ?adjobj . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER isURI(?obj) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
" FILTER isURI(?adjobj) . " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
" UNION " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
"}");
Query query;
QuerySolution soln;
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
Iterator<String> queryItr = queryList.iterator();
dataset.getLock().enterCriticalSection(Lock.READ);
Resource adjacentIndividual = null;
RDFNode coauthor = null;
try{
while(queryItr.hasNext()){
/*if(!isPerson){
queryItr.next(); // we don't want first query to execute if the ind is not a person.
}*/
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
try{
ResultSet results = qexec.execSelect();
while(results.hasNext()){
soln = results.nextSolution();
adjacentIndividual = (Resource)soln.get("adjobj");
if(adjacentIndividual!=null){
adjacentNodes.add(adjacentIndividual.getURI());
}
coauthor = soln.get("coauthor");
if(coauthor!=null){
coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
}
}
}catch(Exception e){
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
}finally{
qexec.close();
}
}
queryList = null;
Iterator<String> itr = adjacentNodes.iterator();
while(itr.hasNext()){
adjacentNodesConcat.append(itr.next() + " ");
}
info[0] = adjacentNodesConcat.toString();
itr = coauthorNames.iterator();
while(itr.hasNext()){
coauthorBuff.append(itr.next());
}
info[1] = coauthorBuff.toString();
}
catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
adjacentNodes = null;
adjacentNodesConcat = null;
coauthorBuff = null;
}
return info;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
// TODO Auto-generated method stub
// calculate beta value.
log.debug("Parameter calculation starts..");
String uri = individual.getURI();
String adjInfo[] = getAdjacentNodes(uri);
StringBuffer info = new StringBuffer();
info.append(adjInfo[0]);
info.append(addUri.toString());
float phi = calculatePhi(info);
for(String term: fieldsToAddBetaTo){
SolrInputField f = doc.getField( term );
f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST);
}
for(String term: fieldsToMultiplyBetaBy){
SolrInputField f = doc.getField( term );
f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
}
SolrInputField f = doc.getField(VitroTermNames.targetInfo);
f.addValue(adjInfo[1],f.getBoost());
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
log.debug("Parameter calculation is done");
}
public void clearMap(){
betaMap.clear();
}
}
class TotalInd implements Runnable{
private Dataset dataset;
private String totalCountQuery;
private static Log log = LogFactory.getLog(TotalInd.class);
public TotalInd(Dataset dataset,String totalCountQuery){
this.dataset = dataset;
this.totalCountQuery = totalCountQuery;
}
public void run(){
int totalInd=0;
Query query;
QuerySolution soln = null;
dataset.getLock().enterCriticalSection(Lock.READ);
try{
query = QueryFactory.create(totalCountQuery,Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset);
ResultSet results = qexec.execSelect();
List<String> resultVars = results.getResultVars();
if(resultVars!=null && resultVars.size()!=0){
soln = results.next();
totalInd = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
}
CalculateParameters.totalInd = totalInd;
log.info("Total number of individuals in the system are : " + CalculateParameters.totalInd);
}catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
}
}
}

View file

@ -0,0 +1,452 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
public class ContextNodeFields implements DocumentModifier{
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
private static final List<String> singleValuedQueriesForAgent = new ArrayList<String>();
private static final List<String> singleValuedQueriesForInformationResource = new ArrayList<String>();
private static final List<String> multiValuedQueriesForAgent = new ArrayList<String>();
private static final String multiValuedQueryForInformationResource;
// private static StringBuffer objectProperties = new StringBuffer();
private Log log = LogFactory.getLog(ContextNodeFields.class);
private Dataset dataset;
public ContextNodeFields(Dataset dataset){
this.dataset = dataset;
}
/* TODO: consider a constructor like this:
* public ContextNodeFields(OntModel fullModel, List<String> queries )
*/
/*
*TODO:
* consider reducing the code in this class using a method like the following:
*/
public StringBuffer runQuery( Individual individual, String query ){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(individual.getURI());
initialBinding.add("uri", uriResource);
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
dataset.getLock().enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
Iterator<String> iter = soln.varNames() ;
while( iter.hasNext()){
String name = iter.next();
RDFNode node = soln.get( name );
if( node != null ){
propertyValues.append(" " + node.toString());
}else{
log.debug(name + " is null");
}
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
dataset.getLock().leaveCriticalSection();
}
return propertyValues;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
log.debug("retrieving context node values..");
SolrInputField field = doc.getField(VitroTermNames.ALLTEXT);
SolrInputField targetField = doc.getField(VitroTermNames.targetInfo);
StringBuffer objectProperties = new StringBuffer();
objectProperties.append(" ");
int threadCount = multiValuedQueriesForAgent.size();
QueryRunner[] threads = new QueryRunner[threadCount];
//Make a thread for each query and start it.
for(int i= 0; i < threadCount; i++){
QueryRunner t = new QueryRunner(individual, multiValuedQueriesForAgent.get(i));
t.start();
threads[i] = t;
}
//Wait for each thread to finish and collect results
for(int i = 0 ; i < threadCount ; i++){
try {
threads[i].join();
objectProperties.append( threads[i].getPropertyValues() ) ;
threads[i] = null;
} catch (InterruptedException e) {
log.error("Thread " + threads[i].getName() + " interrupted!");
}
}
targetField.addValue(" " + runQuery(individual, multiValuedQueryForInformationResource), targetField.getBoost());
field.addValue(objectProperties, field.getBoost());
log.debug("context node values are retrieved");
}
//single valued queries for foaf:Agent
static {
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:hrJobTitle ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:titleOrRole ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:description ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?ContextNodeProperty . "
+ " } ORDER BY ?ContextNodeProperty ");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:majorField ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:departmentOrSchool ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . "
+ " }");
}
//single valued queries for core:InformationResource
static {
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
+ "?b rdfs:label ?ContextNodeProperty .}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ " ?uri core:linkedInformationResource ?d ."
+ " ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ."
+ "}");
}
//multi valued queries
static{
multiValuedQueriesForAgent.add(prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
" (str(?TitleOrRole) as ?titleOrRole) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Relationship . "
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
+ " } ");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
" (str(?Description) as ?description) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?OrganizationLabel . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
+"}");
}
//multivalued query for core:InformationResource
static {
multiValuedQueryForInformationResource = prefix +
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
"(str(?Features) as ?features) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
+"}" ;
}
private class QueryRunner extends Thread{
private Individual ind;
private String query;
private StringBuffer propertyValues = new StringBuffer();
public String getPropertyValues(){
return propertyValues.toString();
}
public QueryRunner(Individual ind, String query){
this.ind = ind;
this.query = query;
}
public void run(){
// StringBuffer propertyValues = new StringBuffer();
propertyValues.append(runQuery(ind, query));
// QuerySolutionMap initialBinding = new QuerySolutionMap();
// Resource uriResource = ResourceFactory.createResource(ind.getURI());
// initialBinding.add("uri", uriResource);
//
// Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
// dataset.getLock().enterCriticalSection(Lock.READ);
// try{
// QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
// try{
// ResultSet results = qExec.execSelect();
// while(results.hasNext()){
// QuerySolution soln = results.nextSolution();
// Iterator<String> iter = soln.varNames() ;
// while( iter.hasNext()){
// String name = iter.next();
// RDFNode node = soln.get( name );
// if( node != null ){
// propertyValues.append(" " + node.toString());
// }else{
// log.debug(name + " is null");
// }
// }
// }
// }catch(Throwable t){
// log.error(t,t);
// } finally{
// qExec.close();
// }
// }finally{
// dataset.getLock().leaveCriticalSection();
// }
//
//objectProperties.append(propertyValues.toString());
}
}
}

View file

@ -0,0 +1,15 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* This interface represents an object that can add to a SolrInputDocument.
*/
public interface DocumentModifier {
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri);
}

View file

@ -2,46 +2,330 @@
package edu.cornell.mannlib.vitro.webapp.search.solr; package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrDocument; import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.solr.common.SolrInputDocument;
import org.joda.time.DateTime;
import com.hp.hpl.jena.vocabulary.OWL;
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
public class IndividualToSolrDocument implements Obj2DocIface { public class IndividualToSolrDocument {
protected LuceneDocToSolrDoc luceneToSolr; public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
protected Entity2LuceneDoc entityToLucene;
public IndividualToSolrDocument(Entity2LuceneDoc e2d){ public static VitroTermNames term = new VitroTermNames();
entityToLucene = e2d;
luceneToSolr = new LuceneDocToSolrDoc(); private static String entClassName = Individual.class.getName();
private ClassProhibitedFromSearch classesProhibitedFromSearch;
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
public List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
private static List<String> contextNodeClassNames = new ArrayList<String>();
public IndividualToSolrDocument(
ClassProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch){
this( classesProhibitedFromSearch,
individualProhibitedFromSearch,
Collections.EMPTY_LIST);
} }
@Override public IndividualToSolrDocument(
public boolean canTranslate(Object obj) { ClassProhibitedFromSearch classesProhibitedFromSearch,
return obj != null && obj instanceof Individual; IndividualProhibitedFromSearch individualProhibitedFromSearch,
List<DocumentModifier> docModifiers){
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
this.documentModifiers = docModifiers;
fillContextNodes();
} }
@Override @SuppressWarnings("static-access")
public boolean canUnTranslate(Object result) { public SolrInputDocument translate(Individual ind) throws IndexingException{
return result != null && result instanceof SolrDocument; long tProhibited = System.currentTimeMillis();
ArrayList<String> superClassNames = null;
StringBuffer addUri = null;
String value;
StringBuffer classPublicNames = new StringBuffer();
classPublicNames.append("");
SolrInputDocument doc = new SolrInputDocument();
//DocId
String id = ind.getURI();
log.debug("translating " + id);
if(id == null){
log.debug("cannot add individuals without URIs to lucene Index");
return null;
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
id.startsWith(OWL.NS)){
log.debug("not indexing because of namespace:" + id);
return null;
} }
@Override //filter out class groups, owl:ObjectProperties etc..
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
return null;
}
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
// Types and classgroups
boolean prohibited = false;
List<VClass> vclasses = ind.getVClasses(false);
superClassNames = new ArrayList<String>();
String superLclName = null;
long tClassgroup = System.currentTimeMillis();
for(VClass clz : vclasses){
superLclName = clz.getLocalName();
superClassNames.add(superLclName);
if(clz.getURI() == null){
continue;
}else if(OWL.Thing.getURI().equals(clz.getURI())){
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
continue;
} else if(clz.getURI().startsWith(OWL.NS)){
log.debug("not indexing " + id + " because of type " + clz.getURI());
return null;
} else if(contextNodeClassNames.contains(superLclName)) { // check to see if context node is being indexed.
return null;
}
else {
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()))
prohibited = true;
if( clz.getSearchBoost() != null)
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
doc.addField(term.RDFTYPE, clz.getURI());
if(clz.getLocalName() != null){
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
}
if(clz.getName() != null){
classPublicNames.append(" ");
classPublicNames.append(clz.getName());
}
//Classgroup URI
if(clz.getGroupURI() != null){
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
}
}
}
if(superClassNames.isEmpty()){
return null;
}
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
//lucene DocID
doc.addField(term.DOCID, entClassName + id);
//vitro id
doc.addField(term.URI, id);
//java class
doc.addField(term.JCLASS, entClassName);
//Individual Label
if(ind.getRdfsLabel() != null)
value = ind.getRdfsLabel();
else{
log.debug("Using local name for individual with rdfs:label " + ind.getURI());
value = ind.getLocalName();
}
// collecting object property statements
String uri = ind.getURI();
StringBuffer objectNames = new StringBuffer();
objectNames.append("");
String t=null;
addUri = new StringBuffer();
addUri.append("");
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
objectNames.append(" ");
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
addUri.append(" ");
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
if(documentModifiers == null || documentModifiers.isEmpty()){
doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
}else{
doc.addField(term.NAME_RAW, value);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase());
doc.addField(term.NAME_UNSTEMMED, value);
doc.addField(term.NAME_STEMMED, value);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
}
long tMoniker = System.currentTimeMillis();
if(documentModifiers == null || documentModifiers.isEmpty()){
//boost for entity
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
doc.setDocumentBoost(ind.getSearchBoost());
}
//thumbnail
try{
value = null;
if(ind.hasThumb())
doc.addField(term.THUMBNAIL, "1");
else
doc.addField(term.THUMBNAIL, "0");
}catch(Exception ex){
log.debug("could not index thumbnail: " + ex);
}
//time of index in millis past epoc
Object anon[] = { new Long((new DateTime() ).getMillis()) };
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
log.debug("time to include thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
long tPropertyStatements = System.currentTimeMillis();
//collecting data property statements
if(!prohibited){
//ALLTEXT, all of the 'full text'
StringBuffer allTextValue = new StringBuffer();
allTextValue.append("");
allTextValue.append(" ");
allTextValue.append(((t=ind.getName()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(((t=ind.getAnchor()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(classPublicNames.toString());
List<DataPropertyStatement> dataPropertyStatements = ind.getDataPropertyStatements();
if (dataPropertyStatements != null) {
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
while (dataPropertyStmtIter.hasNext()) {
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
allTextValue.append(" ");
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
}
}
allTextValue.append(objectNames.toString());
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
String alltext = allTextValue.toString();
doc.addField(term.ALLTEXT, alltext);
doc.addField(term.ALLTEXTUNSTEMMED, alltext);
doc.addField(term.ALLTEXT_PHONETIC, alltext,PHONETIC_BOOST);
//run the document modifiers
if( documentModifiers != null && !documentModifiers.isEmpty()){
doc.addField(term.targetInfo,"");
for(DocumentModifier modifier: documentModifiers){
modifier.modifyDocument(ind, doc, addUri);
}
}
}
return doc;
}
public Object getIndexId(Object obj) { public Object getIndexId(Object obj) {
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented"); throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
} }
@Override public Individual unTranslate(Object result) {
public Object translate(Object obj) throws IndexingException { Individual ent = null;
return luceneToSolr.translate( entityToLucene.translate( obj ) ); if( result != null && result instanceof Document){
Document hit = (Document) result;
String id = hit.get(term.URI);
ent = new IndividualImpl();
ent.setURI(id);
}
return ent;
} }
@Override private void fillContextNodes(){
public Object unTranslate(Object result) { this.contextNodeClassNames.add("Role");
return luceneToSolr.unTranslate( result ); this.contextNodeClassNames.add("AttendeeRole");
this.contextNodeClassNames.add("ClinicalRole");
this.contextNodeClassNames.add("LeaderRole");
this.contextNodeClassNames.add("MemberRole");
this.contextNodeClassNames.add("OutreachProviderRole");
this.contextNodeClassNames.add("PresenterRole");
this.contextNodeClassNames.add("ResearcherRole");
this.contextNodeClassNames.add("InvestigatorRole");
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
this.contextNodeClassNames.add("PrincipalInvestigatorRole");
this.contextNodeClassNames.add("ServiceProviderRole");
this.contextNodeClassNames.add("TeacherRole");
this.contextNodeClassNames.add("Position");
this.contextNodeClassNames.add("FacultyAdministrativePosition");
this.contextNodeClassNames.add("FacultyPosition");
this.contextNodeClassNames.add("LibrarianPosition");
this.contextNodeClassNames.add("Non-AcademicPosition");
this.contextNodeClassNames.add("Non-FacultyAcademicPosition");
this.contextNodeClassNames.add("PostdoctoralPosition");
this.contextNodeClassNames.add("AdvisingRelationship");
this.contextNodeClassNames.add("Authorship");
this.contextNodeClassNames.add("AcademicDegree");
} }
public static float NAME_BOOST = 2.0F;
public static float ALL_TEXT_BOOST = 2.5F;
public static float PHONETIC_BOOST = 0.1F;
} }

View file

@ -1,62 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
/**
* Translate a lucene Document into a SolrDocument.
*/
public class LuceneDocToSolrDoc implements Obj2DocIface {
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Document;
}
@Override
public boolean canUnTranslate(Object result) {
return result != null && result instanceof SolrDocument;
}
@Override
public Object getIndexId(Object obj) {
//"this method isn't useful for solr"
return null;
}
@Override
public Object translate(Object obj) throws IndexingException {
Document luceneDoc = (Document)obj;
SolrInputDocument solrDoc = new SolrInputDocument();
for( Object f : luceneDoc.getFields()){
Field field = (Field)f;
solrDoc.addField( new String(field.name()), field.stringValue() );
}
return solrDoc;
}
@Override
public Object unTranslate(Object result) {
Individual ind = null;
if( result != null && result instanceof SolrDocument){
SolrDocument hit = (SolrDocument)result;
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
ind = new IndividualImpl();
ind.setURI(id);
}
return ind;
}
}

View file

@ -6,37 +6,36 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument; import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
public class SolrIndexer implements IndexerIface { public class SolrIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(SolrIndexer.class); private final static Log log = LogFactory.getLog(SolrIndexer.class);
protected SolrServer server; protected SolrServer server;
protected boolean indexing; protected boolean indexing;
protected List<Obj2DocIface> obj2DocList;
protected HashSet<String> urisIndexed; protected HashSet<String> urisIndexed;
protected IndividualToSolrDocument individualToSolrDoc;
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){ public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
this.server = server; this.server = server;
this.obj2DocList = o2d; this.individualToSolrDoc = indToDoc;
} }
@Override @Override
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException { public void index(Individual ind) throws IndexingException {
if( ! indexing ) if( ! indexing )
throw new IndexingException("SolrIndexer: must call " + throw new IndexingException("SolrIndexer: must call " +
"startIndexing() before index()."); "startIndexing() before index().");
@ -49,38 +48,31 @@ public class SolrIndexer implements IndexerIface {
log.debug("already indexed " + ind.getURI() ); log.debug("already indexed " + ind.getURI() );
return; return;
}else{ }else{
SolrInputDocument solrDoc = null;
synchronized(this){
urisIndexed.add(ind.getURI()); urisIndexed.add(ind.getURI());
}
log.debug("indexing " + ind.getURI()); log.debug("indexing " + ind.getURI());
Iterator<Obj2DocIface> it = getObj2DocList().iterator(); // synchronized(individualToSolrDoc){
while (it.hasNext()) { solrDoc = individualToSolrDoc.translate(ind);
Obj2DocIface obj2doc = (Obj2DocIface) it.next(); // }
if (obj2doc.canTranslate(ind)) {
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
if( solrDoc != null){ if( solrDoc != null){
//sending each doc individually is inefficient //sending each doc individually is inefficient
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(); // Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
docs.add( solrDoc ); // docs.add( solrDoc );
server.add( docs ); UpdateResponse res = server.add( solrDoc );
// if( !newDoc ){ log.debug("response after adding docs to server: "+ res);
// server.add( docs );
// log.debug("updated " + ind.getName() + " " + ind.getURI());
// }else{
// server.add( docs );
// log.debug("added " + ind.getName() + " " + ind.getURI());
// }
}else{ }else{
log.debug("removing from index " + ind.getURI()); log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document?
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind)); //writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
} }
} }
}
}
} catch (IOException ex) { } catch (IOException ex) {
throw new IndexingException(ex.getMessage()); throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) { } catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage()); throw new IndexingException(ex.getMessage());
} }
} }
@Override @Override
@ -115,12 +107,12 @@ public class SolrIndexer implements IndexerIface {
public synchronized void addObj2Doc(Obj2DocIface o2d) { public synchronized void addObj2Doc(Obj2DocIface o2d) {
if (o2d != null) //no longer used
obj2DocList.add(o2d);
} }
public synchronized List<Obj2DocIface> getObj2DocList() { public synchronized List<Obj2DocIface> getObj2DocList() {
return obj2DocList; //no longer used
return null;
} }
@Override @Override
@ -131,11 +123,21 @@ public class SolrIndexer implements IndexerIface {
@Override @Override
public synchronized void endIndexing() { public synchronized void endIndexing() {
try { try {
server.commit(); UpdateResponse res = server.commit();
} catch (Exception e) { log.debug("Response after committing to server: "+ res );
} catch (SolrServerException e) {
log.error("Could not commit to solr server", e); log.error("Could not commit to solr server", e);
} catch(IOException e){
log.error("Could not commit to solr server", e);
}finally{
if(!individualToSolrDoc.documentModifiers.isEmpty()){
if(individualToSolrDoc.documentModifiers.get(0) instanceof CalculateParameters){
CalculateParameters c = (CalculateParameters) individualToSolrDoc.documentModifiers.get(0);
c.clearMap();
log.info("BetaMap cleared");
}
}
} }
try { try {
server.optimize(); server.optimize();
} catch (Exception e) { } catch (Exception e) {

View file

@ -14,6 +14,7 @@ import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties; import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
@ -21,14 +22,14 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering; import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
@ -57,8 +58,8 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
} }
CommonsHttpSolrServer server; CommonsHttpSolrServer server;
server = new CommonsHttpSolrServer( solrServerUrl ); server = new CommonsHttpSolrServer( solrServerUrl );
server.setSoTimeout(1000); // socket read timeout server.setSoTimeout(10000); // socket read timeout
server.setConnectionTimeout(100); server.setConnectionTimeout(10000);
server.setDefaultMaxConnectionsPerHost(100); server.setDefaultMaxConnectionsPerHost(100);
server.setMaxTotalConnections(100); server.setMaxTotalConnections(100);
server.setMaxRetries(1); server.setMaxRetries(1);
@ -67,15 +68,24 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
/* setup the individual to solr doc translation */ /* setup the individual to solr doc translation */
//first we need a ent2luceneDoc translator //first we need a ent2luceneDoc translator
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
OntModel abox = ModelContext.getBaseOntModelSelector(context).getABoxModel();
OntModel inferences = (OntModel)context.getAttribute( JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME);
Dataset dataset = WebappDaoFactoryJena.makeInMemoryDataset(abox, inferences);
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
// modifiers.add(new CalculateParameters(ModelContext.getJenaOntModel(context)));
modifiers.add(new CalculateParameters(dataset));
modifiers.add(new ContextNodeFields(dataset));
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) ); new IndividualProhibitedFromSearchImpl(context),
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc ); modifiers);
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
o2d.add(indToSolrDoc);
/* setup solr indexer */ /* setup solr indexer */
SolrIndexer solrIndexer = new SolrIndexer(server, o2d); SolrIndexer solrIndexer = new SolrIndexer(server, indToSolrDoc);
if( solrIndexer.isIndexEmpty() ){ if( solrIndexer.isIndexEmpty() ){
log.info("solr index is empty, requesting rebuild"); log.info("solr index is empty, requesting rebuild");
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE); sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);

View file

@ -41,8 +41,8 @@ public class ProhibitedFromSearchTest {
ProhibitedFromSearch pfs = new ProhibitedFromSearch( SEARCH_CONFIG_URI , m); ProhibitedFromSearch pfs = new ProhibitedFromSearch( SEARCH_CONFIG_URI , m);
Assert.assertNotNull(pfs.prohibitedClasses); Assert.assertNotNull(pfs.prohibitedClasses);
Assert.assertTrue(pfs.prohibitedClasses.size() == 4); Assert.assertTrue(pfs.prohibitedClasses.size() == 4);
Assert.assertTrue(pfs.isClassProhibited(TEST_CLASS)); Assert.assertTrue(pfs.isClassProhibitedFromSearch(TEST_CLASS));
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test")); Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
} }
@Test @Test
@ -55,8 +55,8 @@ public class ProhibitedFromSearchTest {
ProhibitedFromSearch pfs = new ProhibitedFromSearch( "http://NotFound.com/inModel", m); ProhibitedFromSearch pfs = new ProhibitedFromSearch( "http://NotFound.com/inModel", m);
Assert.assertNotNull(pfs.prohibitedClasses); Assert.assertNotNull(pfs.prohibitedClasses);
Assert.assertTrue(pfs.prohibitedClasses.size() == 0); Assert.assertTrue(pfs.prohibitedClasses.size() == 0);
Assert.assertTrue(!pfs.isClassProhibited(TEST_CLASS)); Assert.assertTrue(!pfs.isClassProhibitedFromSearch(TEST_CLASS));
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test")); Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
} }
@ -122,7 +122,7 @@ public class ProhibitedFromSearchTest {
ProhibitedFromSearch pfs = new ProhibitedFromSearch( DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, m); ProhibitedFromSearch pfs = new ProhibitedFromSearch( DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, m);
Assert.assertNotNull(pfs.prohibitedClasses); Assert.assertNotNull(pfs.prohibitedClasses);
Assert.assertEquals(1, pfs.prohibitedClasses.size() ); Assert.assertEquals(1, pfs.prohibitedClasses.size() );
Assert.assertTrue(pfs.isClassProhibited("http://vivoweb.org/ontology/core#NonAcademic")); Assert.assertTrue(pfs.isClassProhibitedFromSearch("http://vivoweb.org/ontology/core#NonAcademic"));
Assert.assertTrue(!pfs.isClassProhibited("http://someOtherClass.com/test")); Assert.assertTrue(!pfs.isClassProhibitedFromSearch("http://someOtherClass.com/test"));
} }
} }