Committing changes with merge from search relevance branch.

This commit is contained in:
anupsawant 2011-06-23 15:21:33 +00:00
commit 692446335e
31 changed files with 48715 additions and 2514 deletions

View file

@ -24,6 +24,9 @@ public class DisplayVocabulary {
/* Individuals */
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
//bk392 for extracting properties beyond context nodes.
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
/* Page types */
public static final String PAGE_TYPE = NS + "Page";
public static final String HOME_PAGE_TYPE = NS + "HomePage";
@ -37,8 +40,10 @@ public class DisplayVocabulary {
/* Data Properties */
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
public static final String TITLE = NS + "title";
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
//bk392 for extracting properties beyond context nodes.
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
/* URIs for storing menu.n3 */
public static final String MENU_TEXT_RES = NS + "MenuText";
public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText";

View file

@ -780,7 +780,7 @@ public class IndividualJena extends IndividualImpl implements Individual {
Statement stmt = stmtIt.nextStatement();
if (stmt.getObject().isURIResource()) {
String typeURI = ((Resource)stmt.getObject()).getURI();
if (pfs.isClassProhibited(typeURI)) {
if (pfs.isClassProhibitedFromSearch(typeURI)) {
return true;
}
}

View file

@ -1069,7 +1069,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
while(itr.hasNext()) {
String typeURI = itr.next().getURI();
if (pfs.isClassProhibited(typeURI)) {
if (pfs.isClassProhibitedFromSearch(typeURI)) {
return true;
}
}

View file

@ -324,7 +324,7 @@ public class VClassGroupDaoJena extends JenaBaseDao implements VClassGroupDao {
for (VClassGroup group : groups) {
List<VClass> classList = new ArrayList<VClass>();
for (VClass vclass : group.getVitroClassList()) {
if (!pfs.isClassProhibited(vclass.getURI())) {
if (!pfs.isClassProhibitedFromSearch(vclass.getURI())) {
classList.add(vclass);
}
}

View file

@ -0,0 +1,66 @@
package edu.cornell.mannlib.vitro.webapp.search;
public class VitroTermNames {
/** Id of entity, vclass or tab */
public static String URI = "URI";
/** lucene document id */
public static String DOCID = "DocId";
/** java class of the object that the Doc represents. */
public static String JCLASS = "JCLASS";
/** rdf:type */
public static String RDFTYPE = "type";
/** rdf:type */
public static String CLASSGROUP_URI = "classgroup";
/** Modtime from db */
public static String MODTIME = "modTime";
/** time of index in msec since epoc */
public static String INDEXEDTIME= "indexedTime";
/** timekey of entity in yyyymmddhhmm */
public static String TIMEKEY="TIMEKEY";
/** time of sunset/end of entity in yyyymmddhhmm */
public static String SUNSET="SUNSET";
/** time of sunrise/start of entity in yyyymmddhhmm */
public static String SUNRISE="SUNRISE";
/** entity's moniker */
public static String MONIKER="moniker";
/** text for 'full text' search, this is stemmed */
public static String ALLTEXT = "ALLTEXT";
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing targeted information **/
public static final String targetInfo = "targetInfo";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
public static final String THUMBNAIL = "THUMBNAIL";
/** Should individual be included in full text search results? 1=yes 0=no */
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAME = "classLocalName";
// Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME
/** field for beta values of all documents **/
public static final String BETA = "BETA";
public static final String PHI = "PHI";
public static final String ADJACENT_NODES = "ADJACENT_NODES";
/** adding phonetic field **/
public static final String ALLTEXT_PHONETIC = "ALLTEXT_PHONETIC";
public static final String NAME_PHONETIC = "NAME_PHONETIC";
}

View file

@ -0,0 +1,5 @@
package edu.cornell.mannlib.vitro.webapp.search.beans;
public interface ClassProhibitedFromSearch {
public boolean isClassProhibitedFromSearch(String classUri);
}

View file

@ -1,67 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.beans;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class IndividualProhibitedFromSearch {
protected OntModel fullModel;
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearch.class);
public IndividualProhibitedFromSearch( ServletContext context ){
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
}
public boolean isIndividualProhibited(String uri){
if( uri == null || uri.isEmpty() )
return true;
boolean prohibited = false;
try {
fullModel.getLock().enterCriticalSection(Lock.READ);
Query query = makeAskQueryForUri( uri );
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
} finally {
fullModel.getLock().leaveCriticalSection();
}
if( prohibited )
log.debug("prohibited " + uri);
return prohibited;
}
private Query makeAskQueryForUri( String uri ){
String queryString =
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
"ASK { \n" +
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
" FILTER ( \n" +
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
" && \n"+
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
" )\n" +
"}" ;
return QueryFactory.create( queryString );
}
public interface IndividualProhibitedFromSearch {
public boolean isIndividualProhibited(String uri);
}

View file

@ -0,0 +1,70 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.beans;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class IndividualProhibitedFromSearchImpl implements IndividualProhibitedFromSearch {
protected OntModel fullModel;
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearchImpl.class);
public IndividualProhibitedFromSearchImpl( ServletContext context ){
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
}
public IndividualProhibitedFromSearchImpl( OntModel fullModel ){
this.fullModel = fullModel;
}
public boolean isIndividualProhibited(String uri){
if( uri == null || uri.isEmpty() )
return true;
boolean prohibited = false;
try {
fullModel.getLock().enterCriticalSection(Lock.READ);
Query query = makeAskQueryForUri( uri );
prohibited = QueryExecutionFactory.create( query, fullModel).execAsk();
} finally {
fullModel.getLock().leaveCriticalSection();
}
if( prohibited )
log.debug("prohibited " + uri);
return prohibited;
}
private Query makeAskQueryForUri( String uri ){
String queryString =
"PREFIX fn: <http://www.w3.org/2005/xpath-functions#> \n" +
"ASK { \n" +
" <"+uri+"> <" + RDF.type.getURI() + "> ?type . \n" +
" FILTER ( \n" +
" ( fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "\" ) \n" +
" && \n"+
" ! fn:starts-with( str(?type), \"" + VitroVocabulary.vitroURI + "Flag\" ) ) || \n" +
" fn:starts-with( str(?type), \"" + VitroVocabulary.PUBLIC + "\" ) || \n" +
" str(?type) = \"" + OWL.ObjectProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.DatatypeProperty.getURI() + "\" || \n" +
" str(?type) = \"" + OWL.AnnotationProperty.getURI() + "\" \n" +
" )\n" +
"}" ;
return QueryFactory.create( queryString );
}
}

View file

@ -25,7 +25,7 @@ import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
public class ProhibitedFromSearch {
public class ProhibitedFromSearch implements ClassProhibitedFromSearch{
List<String> prohibitedClasses;
String ProhibitedFromSearchURI;
@ -41,7 +41,7 @@ public class ProhibitedFromSearch {
model.register(new ProhibitedFromSearchChangeListener( this ));
}
public synchronized boolean isClassProhibited(String classURI){
public synchronized boolean isClassProhibitedFromSearch(String classURI){
if( classURI != null ){
boolean p = prohibitedClasses.contains(classURI);
log.debug( classURI + " is " + (p?"prohibited":"not prohibited"));

View file

@ -49,6 +49,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -177,6 +178,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
SolrQuery query = getQuery(qtxt, maxHitCount, vreq);
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
QueryResponse response = null;
try {
response = solr.query(query);
@ -349,11 +351,11 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i=0; i<hitCount && n > grpsFound ;i++){
try{
SolrDocument doc = docs.get(i);
Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI);
Collection<Object> grps = doc.getFieldValues(VitroTermNames.CLASSGROUP_URI);
if (grps != null) {
for (Object o : grps) {
String groupUri = o.toString();
if( groupUri != null && ! classGroupsInHits.contains(groupUri)){
if( groupUri != null && !classGroupsInHits.contains(groupUri)){
classGroupsInHits.add(groupUri);
grpsFound++;
if( grpsFound >= n )
@ -364,6 +366,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
} catch(Exception e) {
log.error("problem getting VClassGroups from search hits "
+ e.getMessage() );
e.printStackTrace();
}
}

View file

@ -9,8 +9,11 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
import java.util.Queue;
import javax.servlet.ServletContext;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -21,6 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
/**
* The IndexBuilder is used to rebuild or update a search index.
@ -282,16 +287,41 @@ public class IndexBuilder extends Thread {
* @throws AbortIndexing
*/
private void indexForSource(Iterator<Individual> individuals , boolean newDocs) throws AbortIndexing{
long starttime = System.currentTimeMillis();
long count = 0;
// long starttime = System.currentTimeMillis();
int count = 0;
int numOfThreads = 10;
List<IndexWorkerThread> workers = new ArrayList<IndexWorkerThread>();
boolean distributing = true;
for(int i = 0; i< numOfThreads ;i++){
workers.add(new IndexWorkerThread(indexer,i,distributing)); // made a pool of workers
}
log.info("Indexing worker pool ready for indexing.");
// starting worker threads
for(int i =0; i < numOfThreads; i++){
workers.get(i).start();
}
while(individuals.hasNext()){
if( stopRequested )
throw new AbortIndexing();
Individual ind = null;
try{
ind = individuals.next();
indexer.index(ind, newDocs);
ind = individuals.next();
//indexer.index(ind);
workers.get(count%numOfThreads).addToQueue(ind); // adding individual to worker queue.
}catch(Throwable ex){
if( stopRequested || log == null){//log might be null if system is shutting down.
throw new AbortIndexing();
@ -300,20 +330,33 @@ public class IndexBuilder extends Thread {
log.warn("Error indexing individual " + uri + " " + ex.getMessage());
}
count++;
if( log.isDebugEnabled() ){
/* if( log.isDebugEnabled() ){
if( (count % 100 ) == 0 && count > 0 ){
long dt = (System.currentTimeMillis() - starttime);
log.debug("individuals indexed: " + count + " in " + dt + " msec " +
" time pre individual = " + (dt / count) + " msec" );
}
}
} */
}
log.info(
for(int i =0 ; i < numOfThreads; i ++){
workers.get(i).setDistributing(false);
}
for(int i =0; i < numOfThreads; i++){
try{
workers.get(i).join();
}catch(InterruptedException e){
log.error(e,e);
}
}
/* log.info(
"individuals indexed: " + count + " in " + (System.currentTimeMillis() - starttime) + " msec" +
(count!=0?(" time per individual = " + (System.currentTimeMillis() - starttime)/ count + " msec"):"")
);
);*/
}
/**
* For a list of individuals, this builds a list of dependent resources and returns it.
@ -387,5 +430,7 @@ public class IndexBuilder extends Thread {
private class AbortIndexing extends Exception {
// Just a vanilla exception
}
}
}

View file

@ -0,0 +1,97 @@
package edu.cornell.mannlib.vitro.webapp.search.indexing;
import java.util.LinkedList;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
class IndexWorkerThread extends Thread{
protected IndividualToSolrDocument individualToSolrDoc;
private IndexerIface indexer = null;
private Log log = LogFactory.getLog(IndexWorkerThread.class);
private static long count=0;
private Queue<Individual> indQueue = new LinkedList<Individual>();
private int threadNum;
private static long starttime = 0;
private boolean distributing;
public IndexWorkerThread(IndexerIface indexer, int threadNum,boolean distributing){
this.indexer = indexer;
this.threadNum = threadNum;
this.distributing = distributing;
synchronized(this){
if(starttime == 0)
starttime = System.currentTimeMillis();
}
}
public void addToQueue(Individual ind){
synchronized(indQueue){
indQueue.offer(ind);
indQueue.notify();
}
}
public boolean isQueueEmpty(){
return indQueue.isEmpty();
}
public void setDistributing(boolean distributing){
this.distributing = distributing;
}
public void run(){
while(this.distributing){
synchronized(indQueue){
try{
while(indQueue.isEmpty() && this.distributing){
try{
log.debug("Worker number " + threadNum + " waiting on some work to be alloted.");
indQueue.wait(1000);
}catch(InterruptedException ie){
log.error(ie,ie);
}
}
Thread.sleep(50); //wait a bit to let a bit more work to come into the queue
log.debug("work found for Woker number " + threadNum);
addDocsToIndex();
} catch (InterruptedException e) {
log.debug("Worker number " + threadNum + " woken up",e);
}
catch(Throwable e){
log.error(e,e);
}
}
}
log.info("Worker number " + threadNum + " exiting.");
}
protected void addDocsToIndex() throws IndexingException{
while(!indQueue.isEmpty()){
indexer.index(indQueue.poll());
synchronized(this){
count++;
if( log.isInfoEnabled() ){
if( (count % 100 ) == 0 && count > 0 ){
long dt = (System.currentTimeMillis() - starttime);
log.info("individuals indexed: " + count + " in " + dt + " msec " +
" time per individual = " + (dt / count) + " msec" );
}
}
}
}
}
}

View file

@ -38,7 +38,7 @@ public interface IndexerIface {
* @param newDoc - if true, just insert doc, if false attempt to update.
* @throws IndexingException
*/
public void index(Individual ind, boolean newDoc)throws IndexingException;
public void index(Individual ind)throws IndexingException;
/**

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -21,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
@ -31,6 +34,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
* be as full as possible.
*/
public class Entity2LuceneDoc implements Obj2DocIface{
/** These are the terms for the lucene index */
public static class VitroLuceneTermNames{
/** Id of entity, vclass or tab */
@ -61,6 +65,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing context nodes **/
public static final String CONTEXTNODE = "contextNode";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
@ -108,6 +114,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
private IndividualProhibitedFromSearch individualProhibited;
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
private static HashSet<String> objectProperties = new HashSet<String>();
public Entity2LuceneDoc(
ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibited){
@ -128,7 +138,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Document doc = new Document();
String classPublicNames = "";
//DocId
String id = ent.getURI();
log.debug("translating " + id);
@ -162,7 +171,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("not indexing " + id + " because of type " + clz.getURI());
return null;
}else{
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()) )
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()) )
prohibited = true;
if( clz.getSearchBoost() != null )
@ -196,13 +205,15 @@ public class Entity2LuceneDoc implements Obj2DocIface{
/* lucene DOCID */
doc.add( new Field(term.DOCID, entClassName + id,
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//vitro Id
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
// Individual label
if( ent.getRdfsLabel() != null )
value=ent.getRdfsLabel();
@ -212,7 +223,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName();
}
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
nameRaw.setBoost(NAME_BOOST);
doc.add(nameRaw);
@ -228,8 +238,22 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Field nameStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
nameStemmed.setBoost(NAME_BOOST);
doc.add(nameStemmed);
doc.add(nameStemmed);
String contextNodePropertyValues;
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
/*contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); */
// }
/* Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
doc.add(contextNodeInformation);*/
//Moniker
@ -279,18 +303,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
if( ! prohibited ){
//ALLTEXT, all of the 'full text'
String t=null;
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
value+= " "+ getKeyterms(ent);
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
value+= " "+ getKeyterms(ent);
value+= " " + classPublicNames;
value+= " " + classPublicNames;
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
if (dataPropertyStatements != null) {
@ -310,17 +335,27 @@ public class Entity2LuceneDoc implements Obj2DocIface{
continue;
try {
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
}
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
//stemmed terms
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
//unstemmed terms
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
}
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
return doc;
}

View file

@ -473,5 +473,11 @@ public class LuceneIndexer implements IndexerIface {
System.out.println("Could not clean up temp indexing dir " + currentOffLineDir);
}
}
}
}
@Override
public void index(Individual ind) throws IndexingException {
// TODO Auto-generated method stub
}
}

View file

@ -6,6 +6,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
@ -38,7 +39,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
@ -111,10 +112,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
getAnalyzer());
context.setAttribute(ANALYZER, getAnalyzer());
//bk392 adding another argument to Entity2LuceneDoc
// that takes care of sparql queries for context nodes.
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
new IndividualProhibitedFromSearchImpl(context)
);
indexer.addObj2Doc(translator);
context.setAttribute(LuceneIndexer.class.getName(), indexer);
@ -250,9 +256,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
return analyzer;
}

View file

@ -1,7 +1,7 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.lucene;
package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@ -27,91 +27,92 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
/**
* Setup objects for lucene searching and indexing.
*
* The indexing and search objects, IndexBuilder and Searcher are found by the
* controllers IndexController and SearchController through the servletContext.
* This object will have the method contextInitialized() called when the tomcat
* server starts this webapp.
*
* The contextInitialized() will try to find the lucene index directory,
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
*
* To execute this at context creation put this in web.xml:
<listener>
<listener-class>
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
</listener-class>
</listener>
* @author bdc34
*
*/
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
/**
* Setup objects for lucene searching and indexing.
*
* The indexing and search objects, IndexBuilder and Searcher are found by the
* controllers IndexController and SearchController through the servletContext.
* This object will have the method contextInitialized() called when the tomcat
* server starts this webapp.
*
* The contextInitialized() will try to find the lucene index directory,
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
*
* To execute this at context creation put this in web.xml:
<listener>
<listener-class>
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
</listener-class>
</listener>
* @author bdc34
*
*/
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
private static String indexDir = null;
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex";
/**
* Gets run to set up DataSource when the webapp servlet context gets created.
*/
/**
* Gets run to set up DataSource when the webapp servlet context gets created.
*/
@Override
@SuppressWarnings("unchecked")
public void contextInitialized(ServletContextEvent sce) {
ServletContext context = sce.getServletContext();
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
try{
indexDir = getIndexDirName(sce);
log.info("Lucene indexDir: " + indexDir);
setBoolMax();
HashSet dataPropertyBlacklist = new HashSet<String>();
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
HashSet objectPropertyBlacklist = new HashSet<String>();
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
@SuppressWarnings("unchecked")
public void contextInitialized(ServletContextEvent sce) {
ServletContext context = sce.getServletContext();
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
try{
indexDir = getIndexDirName(sce);
log.info("Lucene indexDir: " + indexDir);
setBoolMax();
HashSet dataPropertyBlacklist = new HashSet<String>();
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
HashSet objectPropertyBlacklist = new HashSet<String>();
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
//This is where to get a LucenIndex from. The indexer will
//need to reference this to notify it of updates to the index
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir);
String liveIndexDir = lif.getLiveIndexDir(context);
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
new IndividualProhibitedFromSearchImpl(context)
);
indexer.addObj2Doc(translator);
indexer.setLuceneIndexFactory(lif);
//This is where the builder gets the list of places to try to
//get objects to index. It is filtered so that non-public text
//does not get into the search index.
WebappDaoFactory wadf =
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
//This is where the builder gets the list of places to try to
//get objects to index. It is filtered so that non-public text
//does not get into the search index.
WebappDaoFactory wadf =
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
VitroFilters vf = VitroFilterUtils.getPublicFilter(context);
wadf = new WebappDaoFactoryFiltering(wadf,vf);
List sources = new ArrayList();
sources.add(wadf.getIndividualDao());
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
// here we add the IndexBuilder with the LuceneIndexer
// to the servlet context so we can access it later in the webapp.
wadf = new WebappDaoFactoryFiltering(wadf,vf);
List sources = new ArrayList();
sources.add(wadf.getIndividualDao());
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
// here we add the IndexBuilder with the LuceneIndexer
// to the servlet context so we can access it later in the webapp.
context.setAttribute(IndexBuilder.class.getName(),builder);
//set up listeners so search index builder is notified of changes to model
@ -119,36 +120,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
SearchReindexingListener srl = new SearchReindexingListener( builder );
ModelContext.registerListenerForChanges(sce.getServletContext(), srl);
}catch(Exception ex){
log.error("Could not setup lucene full text search." , ex);
}
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
}
/**
* Gets run when the webApp Context gets destroyed.
*/
}catch(Exception ex){
log.error("Could not setup lucene full text search." , ex);
}
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
}
/**
* Gets run when the webApp Context gets destroyed.
*/
@Override
public void contextDestroyed(ServletContextEvent sce) {
log.info("**** Running "+this.getClass().getName()+".contextDestroyed()");
IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName());
builder.stopIndexingThread();
}
/**
* In wild card searches the query is first broken into many boolean searches
* OR'ed together. So if there is a query that would match a lot of records
* we need a high max boolean limit for the lucene search.
*
* This sets some static method in the lucene library to achieve this.
*/
public static void setBoolMax() {
BooleanQuery.setMaxClauseCount(16384);
}
builder.stopIndexingThread();
}
/**
* In wild card searches the query is first broken into many boolean searches
* OR'ed together. So if there is a query that would match a lot of records
* we need a high max boolean limit for the lucene search.
*
* This sets some static method in the lucene library to achieve this.
*/
public static void setBoolMax() {
BooleanQuery.setMaxClauseCount(16384);
}
/**
* Gets the name of the directory to store the lucene index in. The
* {@link ConfigurationProperties} should have a property named
@ -190,14 +191,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
return luceneDir.getPath();
}
/**
* Gets the analyzer that will be used when building the indexing
* and when analyzing the incoming search terms.
*
* @return
*/
private Analyzer getAnalyzer() {
return new CJKAnalyzer();
}
}
/**
* Gets the analyzer that will be used when building the indexing
* and when analyzing the incoming search terms.
*
* @return
*/
private Analyzer getAnalyzer() {
return new CJKAnalyzer();
}
}

View file

@ -0,0 +1,347 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.ontology.OntModel;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
public class CalculateParameters implements DocumentModifier {
private Dataset dataset;
public static int totalInd=1;
protected Map<String,Float> betaMap = new Hashtable<String,Float>();
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
private static final String betaQuery = prefix + " SELECT count(distinct ?inLinks) " +
" WHERE { " +
" ?uri rdf:type owl:Thing . " +
" ?inLinks ?prop ?uri . " +
" } ";
private static final String totalCountQuery = prefix + " SELECT count(distinct ?ind) " +
" WHERE { " +
" ?ind rdf:type owl:Thing . " +
" } ";
private static Log log = LogFactory.getLog(CalculateParameters.class);
private static final String[] fieldsToAddBetaTo = {
VitroTermNames.NAME_RAW,
VitroTermNames.NAME_LOWERCASE,
VitroTermNames.NAME_UNSTEMMED,
VitroTermNames.NAME_STEMMED
};
private static final String[] fieldsToMultiplyBetaBy = {
VitroTermNames.ALLTEXT,
VitroTermNames.ALLTEXTUNSTEMMED,
};
public CalculateParameters(Dataset dataset){
this.dataset =dataset;
new Thread(new TotalInd(this.dataset,totalCountQuery)).start();
}
public CalculateParameters(){
super();
}
public float calculateBeta(String uri){
float beta=0;
int Conn=0;
Query query;
QuerySolutionMap initialBinding = new QuerySolutionMap();
QuerySolution soln = null;
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
dataset.getLock().enterCriticalSection(Lock.READ);
try{
query = QueryFactory.create(betaQuery,Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
ResultSet results = qexec.execSelect();
List<String> resultVars = results.getResultVars();
if(resultVars!=null && resultVars.size()!=0){
soln = results.next();
Conn = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
}
}catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
}
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public float calculatePhi(StringBuffer adjNodes){
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
String uri=null;
int size=0;
float phi = 0.1F;
while(nodes.hasMoreTokens()){
size++;
uri = nodes.nextToken();
phi += getBeta(uri);
}
if(size>0)
phi = (float)phi/size;
else
phi = 1;
return phi;
}
public synchronized Float getBeta(String uri){
float beta;
if(betaMap.containsKey(uri)){
beta = betaMap.get(uri);
}else{
beta = calculateBeta(uri); // or calculate & put in map
betaMap.put(uri, beta);
}
return beta;
}
public String[] getAdjacentNodes(String uri){
List<String> queryList = new ArrayList<String>();
Set<String> adjacentNodes = new HashSet<String>();
Set<String> coauthorNames = new HashSet<String>();
String[] info = new String[]{"",""};
StringBuffer adjacentNodesConcat = new StringBuffer();
StringBuffer coauthorBuff = new StringBuffer();
adjacentNodesConcat.append("");
coauthorBuff.append("");
queryList.add(prefix +
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
" WHERE { " +
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" ?uri ?prop ?obj . " +
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj ?prop2 ?obj2 . " +
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
" ?obj2 ?prop3 ?obj3 . " +
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj3 ?prop4 ?adjobj . " +
" ?adjobj rdfs:label ?adjobjLabel . " +
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?prop3!=rdf:type) . " +
" FILTER (?prop4!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
"}");
queryList.add(prefix +
" SELECT ?adjobj " +
" WHERE{ " +
" ?uri rdf:type foaf:Agent . " +
" ?uri ?prop ?obj . " +
" ?obj ?prop2 ?adjobj . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER isURI(?obj) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
" FILTER isURI(?adjobj) . " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
" UNION " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
"}");
Query query;
QuerySolution soln;
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
Iterator<String> queryItr = queryList.iterator();
dataset.getLock().enterCriticalSection(Lock.READ);
Resource adjacentIndividual = null;
RDFNode coauthor = null;
try{
while(queryItr.hasNext()){
/*if(!isPerson){
queryItr.next(); // we don't want first query to execute if the ind is not a person.
}*/
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding);
try{
ResultSet results = qexec.execSelect();
while(results.hasNext()){
soln = results.nextSolution();
adjacentIndividual = (Resource)soln.get("adjobj");
if(adjacentIndividual!=null){
adjacentNodes.add(adjacentIndividual.getURI());
}
coauthor = soln.get("coauthor");
if(coauthor!=null){
coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
}
}
}catch(Exception e){
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
}finally{
qexec.close();
}
}
queryList = null;
Iterator<String> itr = adjacentNodes.iterator();
while(itr.hasNext()){
adjacentNodesConcat.append(itr.next() + " ");
}
info[0] = adjacentNodesConcat.toString();
itr = coauthorNames.iterator();
while(itr.hasNext()){
coauthorBuff.append(itr.next());
}
info[1] = coauthorBuff.toString();
}
catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
adjacentNodes = null;
adjacentNodesConcat = null;
coauthorBuff = null;
}
return info;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
// TODO Auto-generated method stub
// calculate beta value.
log.debug("Parameter calculation starts..");
String uri = individual.getURI();
String adjInfo[] = getAdjacentNodes(uri);
StringBuffer info = new StringBuffer();
info.append(adjInfo[0]);
info.append(addUri.toString());
float phi = calculatePhi(info);
for(String term: fieldsToAddBetaTo){
SolrInputField f = doc.getField( term );
f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST);
}
for(String term: fieldsToMultiplyBetaBy){
SolrInputField f = doc.getField( term );
f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
}
SolrInputField f = doc.getField(VitroTermNames.targetInfo);
f.addValue(adjInfo[1],f.getBoost());
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
log.debug("Parameter calculation is done");
}
public void clearMap(){
betaMap.clear();
}
}
class TotalInd implements Runnable{
private Dataset dataset;
private String totalCountQuery;
private static Log log = LogFactory.getLog(TotalInd.class);
public TotalInd(Dataset dataset,String totalCountQuery){
this.dataset = dataset;
this.totalCountQuery = totalCountQuery;
}
public void run(){
int totalInd=0;
Query query;
QuerySolution soln = null;
dataset.getLock().enterCriticalSection(Lock.READ);
try{
query = QueryFactory.create(totalCountQuery,Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,dataset);
ResultSet results = qexec.execSelect();
List<String> resultVars = results.getResultVars();
if(resultVars!=null && resultVars.size()!=0){
soln = results.next();
totalInd = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm());
}
CalculateParameters.totalInd = totalInd;
log.info("Total number of individuals in the system are : " + CalculateParameters.totalInd);
}catch(Throwable t){
log.error(t,t);
}finally{
dataset.getLock().leaveCriticalSection();
}
}
}

View file

@ -0,0 +1,452 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
public class ContextNodeFields implements DocumentModifier{
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
private static final List<String> singleValuedQueriesForAgent = new ArrayList<String>();
private static final List<String> singleValuedQueriesForInformationResource = new ArrayList<String>();
private static final List<String> multiValuedQueriesForAgent = new ArrayList<String>();
private static final String multiValuedQueryForInformationResource;
// private static StringBuffer objectProperties = new StringBuffer();
private Log log = LogFactory.getLog(ContextNodeFields.class);
private Dataset dataset;
public ContextNodeFields(Dataset dataset){
this.dataset = dataset;
}
/* TODO: consider a constructor like this:
* public ContextNodeFields(OntModel fullModel, List<String> queries )
*/
/*
*TODO:
* consider reducing the code in this class using a method like the following:
*/
public StringBuffer runQuery( Individual individual, String query ){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(individual.getURI());
initialBinding.add("uri", uriResource);
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
dataset.getLock().enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
Iterator<String> iter = soln.varNames() ;
while( iter.hasNext()){
String name = iter.next();
RDFNode node = soln.get( name );
if( node != null ){
propertyValues.append(" " + node.toString());
}else{
log.debug(name + " is null");
}
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
dataset.getLock().leaveCriticalSection();
}
return propertyValues;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
log.debug("retrieving context node values..");
SolrInputField field = doc.getField(VitroTermNames.ALLTEXT);
SolrInputField targetField = doc.getField(VitroTermNames.targetInfo);
StringBuffer objectProperties = new StringBuffer();
objectProperties.append(" ");
int threadCount = multiValuedQueriesForAgent.size();
QueryRunner[] threads = new QueryRunner[threadCount];
//Make a thread for each query and start it.
for(int i= 0; i < threadCount; i++){
QueryRunner t = new QueryRunner(individual, multiValuedQueriesForAgent.get(i));
t.start();
threads[i] = t;
}
//Wait for each thread to finish and collect results
for(int i = 0 ; i < threadCount ; i++){
try {
threads[i].join();
objectProperties.append( threads[i].getPropertyValues() ) ;
threads[i] = null;
} catch (InterruptedException e) {
log.error("Thread " + threads[i].getName() + " interrupted!");
}
}
targetField.addValue(" " + runQuery(individual, multiValuedQueryForInformationResource), targetField.getBoost());
field.addValue(objectProperties, field.getBoost());
log.debug("context node values are retrieved");
}
//single valued queries for foaf:Agent
static {
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:hrJobTitle ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:titleOrRole ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:description ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?ContextNodeProperty . "
+ " } ORDER BY ?ContextNodeProperty ");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:majorField ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:departmentOrSchool ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . "
+ " }");
}
//single valued queries for core:InformationResource
static {
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
+ "?b rdfs:label ?ContextNodeProperty .}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ " ?uri core:linkedInformationResource ?d ."
+ " ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ."
+ "}");
}
//multi valued queries
static{
multiValuedQueriesForAgent.add(prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
" (str(?TitleOrRole) as ?titleOrRole) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Relationship . "
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
+ " } ");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
" (str(?Description) as ?description) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?OrganizationLabel . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
+"}");
}
//multivalued query for core:InformationResource
static {
multiValuedQueryForInformationResource = prefix +
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
"(str(?Features) as ?features) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
+"}" ;
}
private class QueryRunner extends Thread{
private Individual ind;
private String query;
private StringBuffer propertyValues = new StringBuffer();
public String getPropertyValues(){
return propertyValues.toString();
}
public QueryRunner(Individual ind, String query){
this.ind = ind;
this.query = query;
}
public void run(){
// StringBuffer propertyValues = new StringBuffer();
propertyValues.append(runQuery(ind, query));
// QuerySolutionMap initialBinding = new QuerySolutionMap();
// Resource uriResource = ResourceFactory.createResource(ind.getURI());
// initialBinding.add("uri", uriResource);
//
// Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
// dataset.getLock().enterCriticalSection(Lock.READ);
// try{
// QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, dataset, initialBinding);
// try{
// ResultSet results = qExec.execSelect();
// while(results.hasNext()){
// QuerySolution soln = results.nextSolution();
// Iterator<String> iter = soln.varNames() ;
// while( iter.hasNext()){
// String name = iter.next();
// RDFNode node = soln.get( name );
// if( node != null ){
// propertyValues.append(" " + node.toString());
// }else{
// log.debug(name + " is null");
// }
// }
// }
// }catch(Throwable t){
// log.error(t,t);
// } finally{
// qExec.close();
// }
// }finally{
// dataset.getLock().leaveCriticalSection();
// }
//
//objectProperties.append(propertyValues.toString());
}
}
}

View file

@ -0,0 +1,15 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* This interface represents an object that can add to a SolrInputDocument.
*/
public interface DocumentModifier {
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri);
}

View file

@ -2,46 +2,330 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrDocument;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.solr.common.SolrInputDocument;
import org.joda.time.DateTime;
import com.hp.hpl.jena.vocabulary.OWL;
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
public class IndividualToSolrDocument implements Obj2DocIface {
public class IndividualToSolrDocument {
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
protected LuceneDocToSolrDoc luceneToSolr;
protected Entity2LuceneDoc entityToLucene;
public static VitroTermNames term = new VitroTermNames();
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
entityToLucene = e2d;
luceneToSolr = new LuceneDocToSolrDoc();
private static String entClassName = Individual.class.getName();
private ClassProhibitedFromSearch classesProhibitedFromSearch;
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
public List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
private static List<String> contextNodeClassNames = new ArrayList<String>();
public IndividualToSolrDocument(
ClassProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch){
this( classesProhibitedFromSearch,
individualProhibitedFromSearch,
Collections.EMPTY_LIST);
}
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Individual;
public IndividualToSolrDocument(
ClassProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch,
List<DocumentModifier> docModifiers){
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
this.documentModifiers = docModifiers;
fillContextNodes();
}
@SuppressWarnings("static-access")
public SolrInputDocument translate(Individual ind) throws IndexingException{
long tProhibited = System.currentTimeMillis();
ArrayList<String> superClassNames = null;
StringBuffer addUri = null;
String value;
StringBuffer classPublicNames = new StringBuffer();
classPublicNames.append("");
SolrInputDocument doc = new SolrInputDocument();
//DocId
String id = ind.getURI();
log.debug("translating " + id);
if(id == null){
log.debug("cannot add individuals without URIs to lucene Index");
return null;
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
id.startsWith(OWL.NS)){
log.debug("not indexing because of namespace:" + id);
return null;
}
//filter out class groups, owl:ObjectProperties etc..
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
return null;
}
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
// Types and classgroups
boolean prohibited = false;
List<VClass> vclasses = ind.getVClasses(false);
superClassNames = new ArrayList<String>();
String superLclName = null;
long tClassgroup = System.currentTimeMillis();
for(VClass clz : vclasses){
superLclName = clz.getLocalName();
superClassNames.add(superLclName);
if(clz.getURI() == null){
continue;
}else if(OWL.Thing.getURI().equals(clz.getURI())){
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
continue;
} else if(clz.getURI().startsWith(OWL.NS)){
log.debug("not indexing " + id + " because of type " + clz.getURI());
return null;
} else if(contextNodeClassNames.contains(superLclName)) { // check to see if context node is being indexed.
return null;
}
else {
if( !prohibited && classesProhibitedFromSearch.isClassProhibitedFromSearch(clz.getURI()))
prohibited = true;
if( clz.getSearchBoost() != null)
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
doc.addField(term.RDFTYPE, clz.getURI());
if(clz.getLocalName() != null){
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
}
if(clz.getName() != null){
classPublicNames.append(" ");
classPublicNames.append(clz.getName());
}
//Classgroup URI
if(clz.getGroupURI() != null){
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
}
}
}
if(superClassNames.isEmpty()){
return null;
}
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
@Override
public boolean canUnTranslate(Object result) {
return result != null && result instanceof SolrDocument;
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
//lucene DocID
doc.addField(term.DOCID, entClassName + id);
//vitro id
doc.addField(term.URI, id);
//java class
doc.addField(term.JCLASS, entClassName);
//Individual Label
if(ind.getRdfsLabel() != null)
value = ind.getRdfsLabel();
else{
log.debug("Using local name for individual with rdfs:label " + ind.getURI());
value = ind.getLocalName();
}
// collecting object property statements
String uri = ind.getURI();
StringBuffer objectNames = new StringBuffer();
objectNames.append("");
String t=null;
addUri = new StringBuffer();
addUri.append("");
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
objectNames.append(" ");
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
addUri.append(" ");
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
if(documentModifiers == null || documentModifiers.isEmpty()){
doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
}else{
doc.addField(term.NAME_RAW, value);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase());
doc.addField(term.NAME_UNSTEMMED, value);
doc.addField(term.NAME_STEMMED, value);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
}
long tMoniker = System.currentTimeMillis();
if(documentModifiers == null || documentModifiers.isEmpty()){
//boost for entity
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
doc.setDocumentBoost(ind.getSearchBoost());
}
//thumbnail
try{
value = null;
if(ind.hasThumb())
doc.addField(term.THUMBNAIL, "1");
else
doc.addField(term.THUMBNAIL, "0");
}catch(Exception ex){
log.debug("could not index thumbnail: " + ex);
}
//time of index in millis past epoc
Object anon[] = { new Long((new DateTime() ).getMillis()) };
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
log.debug("time to include thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
long tPropertyStatements = System.currentTimeMillis();
//collecting data property statements
if(!prohibited){
//ALLTEXT, all of the 'full text'
StringBuffer allTextValue = new StringBuffer();
allTextValue.append("");
allTextValue.append(" ");
allTextValue.append(((t=ind.getName()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(((t=ind.getAnchor()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(classPublicNames.toString());
List<DataPropertyStatement> dataPropertyStatements = ind.getDataPropertyStatements();
if (dataPropertyStatements != null) {
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
while (dataPropertyStmtIter.hasNext()) {
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
allTextValue.append(" ");
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
}
}
allTextValue.append(objectNames.toString());
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
String alltext = allTextValue.toString();
doc.addField(term.ALLTEXT, alltext);
doc.addField(term.ALLTEXTUNSTEMMED, alltext);
doc.addField(term.ALLTEXT_PHONETIC, alltext,PHONETIC_BOOST);
//run the document modifiers
if( documentModifiers != null && !documentModifiers.isEmpty()){
doc.addField(term.targetInfo,"");
for(DocumentModifier modifier: documentModifiers){
modifier.modifyDocument(ind, doc, addUri);
}
}
}
return doc;
}
@Override
public Object getIndexId(Object obj) {
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
}
@Override
public Object translate(Object obj) throws IndexingException {
return luceneToSolr.translate( entityToLucene.translate( obj ) );
public Individual unTranslate(Object result) {
Individual ent = null;
if( result != null && result instanceof Document){
Document hit = (Document) result;
String id = hit.get(term.URI);
ent = new IndividualImpl();
ent.setURI(id);
}
return ent;
}
@Override
public Object unTranslate(Object result) {
return luceneToSolr.unTranslate( result );
private void fillContextNodes(){
this.contextNodeClassNames.add("Role");
this.contextNodeClassNames.add("AttendeeRole");
this.contextNodeClassNames.add("ClinicalRole");
this.contextNodeClassNames.add("LeaderRole");
this.contextNodeClassNames.add("MemberRole");
this.contextNodeClassNames.add("OutreachProviderRole");
this.contextNodeClassNames.add("PresenterRole");
this.contextNodeClassNames.add("ResearcherRole");
this.contextNodeClassNames.add("InvestigatorRole");
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
this.contextNodeClassNames.add("PrincipalInvestigatorRole");
this.contextNodeClassNames.add("ServiceProviderRole");
this.contextNodeClassNames.add("TeacherRole");
this.contextNodeClassNames.add("Position");
this.contextNodeClassNames.add("FacultyAdministrativePosition");
this.contextNodeClassNames.add("FacultyPosition");
this.contextNodeClassNames.add("LibrarianPosition");
this.contextNodeClassNames.add("Non-AcademicPosition");
this.contextNodeClassNames.add("Non-FacultyAcademicPosition");
this.contextNodeClassNames.add("PostdoctoralPosition");
this.contextNodeClassNames.add("AdvisingRelationship");
this.contextNodeClassNames.add("Authorship");
this.contextNodeClassNames.add("AcademicDegree");
}
public static float NAME_BOOST = 2.0F;
public static float ALL_TEXT_BOOST = 2.5F;
public static float PHONETIC_BOOST = 0.1F;
}

View file

@ -1,62 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
/**
* Translate a lucene Document into a SolrDocument.
*/
public class LuceneDocToSolrDoc implements Obj2DocIface {
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Document;
}
@Override
public boolean canUnTranslate(Object result) {
return result != null && result instanceof SolrDocument;
}
@Override
public Object getIndexId(Object obj) {
//"this method isn't useful for solr"
return null;
}
@Override
public Object translate(Object obj) throws IndexingException {
Document luceneDoc = (Document)obj;
SolrInputDocument solrDoc = new SolrInputDocument();
for( Object f : luceneDoc.getFields()){
Field field = (Field)f;
solrDoc.addField( new String(field.name()), field.stringValue() );
}
return solrDoc;
}
@Override
public Object unTranslate(Object result) {
Individual ind = null;
if( result != null && result instanceof SolrDocument){
SolrDocument hit = (SolrDocument)result;
String id = (String) hit.getFieldValue(Entity2LuceneDoc.term.URI);
ind = new IndividualImpl();
ind.setURI(id);
}
return ind;
}
}

View file

@ -6,40 +6,39 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.CalculateParameters;
public class SolrIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(SolrIndexer.class);
protected SolrServer server;
protected boolean indexing;
protected List<Obj2DocIface> obj2DocList;
protected HashSet<String> urisIndexed;
protected boolean indexing;
protected HashSet<String> urisIndexed;
protected IndividualToSolrDocument individualToSolrDoc;
public SolrIndexer( SolrServer server, List<Obj2DocIface> o2d){
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
this.server = server;
this.obj2DocList = o2d;
this.individualToSolrDoc = indToDoc;
}
@Override
public synchronized void index(Individual ind, boolean newDoc) throws IndexingException {
public void index(Individual ind) throws IndexingException {
if( ! indexing )
throw new IndexingException("SolrIndexer: must call " +
"startIndexing() before index().");
"startIndexing() before index().");
if( ind == null )
log.debug("Individual to index was null, ignoring.");
@ -49,38 +48,31 @@ public class SolrIndexer implements IndexerIface {
log.debug("already indexed " + ind.getURI() );
return;
}else{
urisIndexed.add(ind.getURI());
log.debug("indexing " + ind.getURI());
Iterator<Obj2DocIface> it = getObj2DocList().iterator();
while (it.hasNext()) {
Obj2DocIface obj2doc = (Obj2DocIface) it.next();
if (obj2doc.canTranslate(ind)) {
SolrInputDocument solrDoc = (SolrInputDocument) obj2doc.translate(ind);
if( solrDoc != null){
//sending each doc individually is inefficient
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
docs.add( solrDoc );
server.add( docs );
// if( !newDoc ){
// server.add( docs );
// log.debug("updated " + ind.getName() + " " + ind.getURI());
// }else{
// server.add( docs );
// log.debug("added " + ind.getName() + " " + ind.getURI());
// }
}else{
log.debug("removing from index " + ind.getURI());
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
}
}
}
SolrInputDocument solrDoc = null;
synchronized(this){
urisIndexed.add(ind.getURI());
}
log.debug("indexing " + ind.getURI());
// synchronized(individualToSolrDoc){
solrDoc = individualToSolrDoc.translate(ind);
// }
if( solrDoc != null){
//sending each doc individually is inefficient
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
// docs.add( solrDoc );
UpdateResponse res = server.add( solrDoc );
log.debug("response after adding docs to server: "+ res);
}else{
log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document?
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
}
}
} catch (IOException ex) {
throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage());
}
}
}
@Override
@ -115,12 +107,12 @@ public class SolrIndexer implements IndexerIface {
public synchronized void addObj2Doc(Obj2DocIface o2d) {
if (o2d != null)
obj2DocList.add(o2d);
//no longer used
}
public synchronized List<Obj2DocIface> getObj2DocList() {
return obj2DocList;
//no longer used
return null;
}
@Override
@ -131,11 +123,21 @@ public class SolrIndexer implements IndexerIface {
@Override
public synchronized void endIndexing() {
try {
server.commit();
} catch (Exception e) {
UpdateResponse res = server.commit();
log.debug("Response after committing to server: "+ res );
} catch (SolrServerException e) {
log.error("Could not commit to solr server", e);
} catch(IOException e){
log.error("Could not commit to solr server", e);
}finally{
if(!individualToSolrDoc.documentModifiers.isEmpty()){
if(individualToSolrDoc.documentModifiers.get(0) instanceof CalculateParameters){
CalculateParameters c = (CalculateParameters) individualToSolrDoc.documentModifiers.get(0);
c.clearMap();
log.info("BetaMap cleared");
}
}
}
try {
server.optimize();
} catch (Exception e) {

View file

@ -14,6 +14,7 @@ import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
@ -21,14 +22,14 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
@ -57,8 +58,8 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
}
CommonsHttpSolrServer server;
server = new CommonsHttpSolrServer( solrServerUrl );
server.setSoTimeout(1000); // socket read timeout
server.setConnectionTimeout(100);
server.setSoTimeout(10000); // socket read timeout
server.setConnectionTimeout(10000);
server.setDefaultMaxConnectionsPerHost(100);
server.setMaxTotalConnections(100);
server.setMaxRetries(1);
@ -67,15 +68,24 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
/* setup the individual to solr doc translation */
//first we need a ent2luceneDoc translator
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
o2d.add(indToSolrDoc);
OntModel abox = ModelContext.getBaseOntModelSelector(context).getABoxModel();
OntModel inferences = (OntModel)context.getAttribute( JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME);
Dataset dataset = WebappDaoFactoryJena.makeInMemoryDataset(abox, inferences);
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
// modifiers.add(new CalculateParameters(ModelContext.getJenaOntModel(context)));
modifiers.add(new CalculateParameters(dataset));
modifiers.add(new ContextNodeFields(dataset));
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearchImpl(context),
modifiers);
/* setup solr indexer */
SolrIndexer solrIndexer = new SolrIndexer(server, o2d);
SolrIndexer solrIndexer = new SolrIndexer(server, indToSolrDoc);
if( solrIndexer.isIndexEmpty() ){
log.info("solr index is empty, requesting rebuild");
sce.getServletContext().setAttribute(LuceneSetup.INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);