diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java index 02c4ce64b..5d5f4cf59 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java @@ -552,7 +552,7 @@ public class SearchQueryHandler { RDFNode linkedAuthor = soln.get("linkedAuthor"); if(linkedAuthor != null){ - propertyValues.append(" publications " + linkedAuthor.toString()); + propertyValues.append(" publications " + linkedAuthor.toString() + " publications "); }else{ log.debug("linkedAuthor is null "); } @@ -587,7 +587,7 @@ public class SearchQueryHandler { RDFNode features = soln.get("features"); if(features != null){ - propertyValues.append(" publications " + features.toString()); + propertyValues.append(" publications " + features.toString() + " publications "); }else{ log.debug("features is null "); } @@ -604,18 +604,7 @@ public class SearchQueryHandler { return propertyValues.toString(); } - - public float calculateBeta(String uri){ - float beta=0; - RDFNode node = (Resource) fullModel.getResource(uri); - StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node); - int Conn = stmtItr.toList().size(); - beta = (float)Conn/totalInd; - beta *= 100; - beta += 1; - return beta; - } - + public String[] getAdjacentNodes(String uri,boolean isPerson){ List queryList = new ArrayList(); @@ -705,7 +694,7 @@ public class SearchQueryHandler { coauthor = soln.get("coauthor"); if(coauthor!=null){ - coauthorNames.add(" co-authors " + coauthor.toString()); + coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors "); } } }catch(Exception e){ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateBeta.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateBeta.java deleted file mode 100644 index ac8a165c3..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateBeta.java +++ /dev/null @@ -1,84 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.solr; - -import java.util.Hashtable; -import java.util.Map; - -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.SolrInputField; - -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.StmtIterator; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; - -public class CalculateBeta implements DocumentModifier{ - private static final String[] fieldsToAddBetaTo = { - VitroTermNames.NAME_RAW, - VitroTermNames.NAME_LOWERCASE, - VitroTermNames.NAME_UNSTEMMED, - VitroTermNames.NAME_STEMMED - }; - - private static final String[] fieldsToMultiplyBetaBy = { - VitroTermNames.ALLTEXT, - VitroTermNames.ALLTEXTUNSTEMMED, - }; - - Model fullModel; - int totalInd; - public static Map betas = new Hashtable(); - - public CalculateBeta(OntModel fullModel){ - this.fullModel=fullModel; - this.totalInd = fullModel.listIndividuals().toList().size(); - } - - @Override - public void modifyDocument(Individual individual, SolrInputDocument doc) { - // TODO Auto-generated method stub - - // get beta value - float beta = 0; - if(betas.containsKey(individual.getURI())){ - beta = betas.get(individual.getURI()); - }else{ - beta = calculateBeta(individual.getURI()); // or calculate & put in map - betas.put(individual.getURI(), beta); - } - //doc.addField(term.BETA,beta); - - for(String term: fieldsToAddBetaTo){ - SolrInputField f = doc.getField( term ); - f.setBoost( beta + f.getBoost() ); - } - - for(String term: fieldsToMultiplyBetaBy){ - SolrInputField f = doc.getField( term ); - f.setBoost( beta * f.getBoost() ); - } - - doc.setDocumentBoost( beta * doc.getDocumentBoost() ); - } - - public float calculateBeta(String uri){ - float beta=0; - RDFNode node = (Resource) fullModel.getResource(uri); - StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node); - int Conn = stmtItr.toList().size(); - beta = (float)Conn/totalInd; - beta *= 100; - beta += 1; - return beta; - } - - public Float getBeta(String uri){ - return betas.get(uri); - } -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java new file mode 100644 index 000000000..7c98aee91 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java @@ -0,0 +1,278 @@ +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; + +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.QuerySolutionMap; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.shared.Lock; +import com.hp.hpl.jena.ontology.OntModel; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; + +public class CalculateParameters implements DocumentModifier { + + Model fullModel; + int totalInd; + public static Map betaMap = new Hashtable(); + private float phi; + private static final String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + private static Log log = LogFactory.getLog(CalculateParameters.class); + + private static final String[] fieldsToAddBetaTo = { + VitroTermNames.NAME_RAW, + VitroTermNames.NAME_LOWERCASE, + VitroTermNames.NAME_UNSTEMMED, + VitroTermNames.NAME_STEMMED + }; + + private static final String[] fieldsToMultiplyBetaBy = { + VitroTermNames.ALLTEXT, + VitroTermNames.ALLTEXTUNSTEMMED, + }; + + public CalculateParameters(OntModel fullModel){ + this.fullModel=fullModel; + this.totalInd = fullModel.listIndividuals().toList().size(); + } + + public float calculateBeta(String uri){ + float beta=0; + RDFNode node = (Resource) fullModel.getResource(uri); + StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node); + int Conn = stmtItr.toList().size(); + beta = (float)Conn/totalInd; + beta *= 100; + beta += 1; + return beta; + } + + public float calculatePhi(StringBuffer adjNodes){ + + StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," "); + String uri=null; + float beta=0; + int size=0; + phi = 0.1F; + while(nodes.hasMoreTokens()){ + size++; + uri = nodes.nextToken(); + if(hasBeta(uri)){ // get if already calculated + phi += getBeta(uri); + }else{ // query if not calculated and put in map + beta = calculateBeta(uri); + setBeta(uri, beta); + phi+=beta; + } + } + if(size>0) + phi = (float)phi/size; + else + phi = 1; + return phi; + } + + public Float getBeta(String uri){ + return betaMap.get(uri); + } + public float getPhi(){ + return phi; + } + public boolean hasBeta(String uri){ + return betaMap.containsKey(uri); + } + public void setBeta(String uri, float beta){ + betaMap.put(uri, beta); + } + + public String[] getAdjacentNodes(String uri,boolean isPerson){ + + List queryList = new ArrayList(); + Set adjacentNodes = new HashSet(); + Set coauthorNames = new HashSet(); + String[] info = new String[]{"",""}; + StringBuffer adjacentNodesConcat = new StringBuffer(); + StringBuffer coauthorBuff = new StringBuffer(); + adjacentNodesConcat.append(""); + coauthorBuff.append(""); + + queryList.add(prefix + + " SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " + + " WHERE { " + + " ?uri rdf:type . " + + " ?uri ?prop ?obj . " + + " ?obj rdf:type . " + + " ?obj ?prop2 ?obj2 . " + + " ?obj2 rdf:type . " + + " ?obj2 ?prop3 ?obj3 . " + + " ?obj3 rdf:type . " + + " ?obj3 ?prop4 ?adjobj . " + + " ?adjobj rdfs:label ?adjobjLabel . " + + " ?adjobj rdf:type . " + + + " FILTER (?prop !=rdf:type) . " + + " FILTER (?prop2!=rdf:type) . " + + " FILTER (?prop3!=rdf:type) . " + + " FILTER (?prop4!=rdf:type) . " + + " FILTER (?adjobj != ?uri) . " + + "}"); + + queryList.add(prefix + + " SELECT ?adjobj " + + " WHERE{ " + + + " ?uri rdf:type foaf:Agent . " + + " ?uri ?prop ?obj . " + + " ?obj ?prop2 ?adjobj . " + + + + " FILTER (?prop !=rdf:type) . " + + " FILTER isURI(?obj) . " + + + " FILTER (?prop2!=rdf:type) . " + + " FILTER (?adjobj != ?uri) . " + + " FILTER isURI(?adjobj) . " + + + " { ?adjobj rdf:type . } " + + " UNION " + + " { ?adjobj rdf:type . } " + + " UNION " + + " { ?adjobj rdf:type . } " + + " UNION " + + " { ?adjobj rdf:type . } ." + + "}"); + + Query query; + + QuerySolution soln; + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + Iterator queryItr = queryList.iterator(); + + fullModel.enterCriticalSection(Lock.READ); + Resource adjacentIndividual = null; + RDFNode coauthor = null; + try{ + while(queryItr.hasNext()){ + if(!isPerson){ + queryItr.next(); // we don't want first query to execute if the ind is not a person. + } + query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ); + QueryExecution qexec = QueryExecutionFactory.create(query,fullModel,initialBinding); + try{ + ResultSet results = qexec.execSelect(); + while(results.hasNext()){ + soln = results.nextSolution(); + + adjacentIndividual = (Resource)soln.get("adjobj"); + if(adjacentIndividual!=null){ + adjacentNodes.add(adjacentIndividual.getURI()); + } + + coauthor = soln.get("coauthor"); + if(coauthor!=null){ + coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors "); + } + } + }catch(Exception e){ + log.error("Error found in getAdjacentNodes method of SearchQueryHandler"); + }finally{ + qexec.close(); + } + } + queryList = null; + Iterator itr = adjacentNodes.iterator(); + while(itr.hasNext()){ + adjacentNodesConcat.append(itr.next() + " "); + } + + info[0] = adjacentNodesConcat.toString(); + + itr = coauthorNames.iterator(); + while(itr.hasNext()){ + coauthorBuff.append(itr.next()); + } + + info[1] = coauthorBuff.toString(); + + } + catch(Throwable t){ + log.error(t,t); + }finally{ + fullModel.leaveCriticalSection(); + adjacentNodes = null; + adjacentNodesConcat = null; + coauthorBuff = null; + } + return info; + } + + @Override + public void modifyDocument(Individual individual, SolrInputDocument doc) { + // TODO Auto-generated method stub + // calculate beta value. + float beta = 0; + String uri = individual.getURI(); + if(hasBeta(uri)){ + beta = getBeta(uri); + }else{ + beta = calculateBeta(uri); // or calculate & put in map + setBeta(uri,beta); + } + + boolean isPerson = (IndividualToSolrDocument.superClassNames.contains("Person")) ? true : false ; + String adjInfo[] = getAdjacentNodes(uri,isPerson); + StringBuffer info = new StringBuffer(); + info.append(adjInfo[0]); + info.append(IndividualToSolrDocument.addUri.toString()); + phi = calculatePhi(info); + + for(String term: fieldsToAddBetaTo){ + SolrInputField f = doc.getField( term ); + f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST); + } + + for(String term: fieldsToMultiplyBetaBy){ + SolrInputField f = doc.getField( term ); + f.setBoost( getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST); + } + doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculatePhi.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculatePhi.java deleted file mode 100644 index 245ed7fec..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculatePhi.java +++ /dev/null @@ -1,22 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.solr; - -import org.apache.solr.common.SolrInputDocument; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; - -public class CalculatePhi implements DocumentModifier{ - CalculateBeta betas; - - // maybe Phi needs Beta? - public CalculatePhi(CalculateBeta betas){ - this.betas = betas; - } - - @Override - public void modifyDocument(Individual individual, SolrInputDocument doc) { - // TODO Auto-generated method stub - } - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 53976a5e4..a52dc14f6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -52,20 +52,19 @@ public class IndividualToSolrDocument implements Obj2DocIface { private IndividualProhibitedFromSearch individualProhibitedFromSearch; private SearchQueryHandler searchQueryHandler; + + public static ArrayList superClassNames = null; + + public static StringBuffer addUri = null; private List documentModifiers = new ArrayList(); - - private static List contextNodeClassNames = new ArrayList(); public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, IndividualProhibitedFromSearch individualProhibitedFromSearch, SearchQueryHandler searchQueryHandler){ - this.classesProhibitedFromSearch = classesProhibitedFromSearch; - this.individualProhibitedFromSearch = individualProhibitedFromSearch; - this.searchQueryHandler = searchQueryHandler; - fillContextNodes(); + this(classesProhibitedFromSearch,individualProhibitedFromSearch,searchQueryHandler,null); } public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, @@ -118,7 +117,7 @@ public class IndividualToSolrDocument implements Obj2DocIface { // Types and classgroups boolean prohibited = false; List vclasses = ent.getVClasses(false); - ArrayList superClassNames = new ArrayList(); + superClassNames = new ArrayList(); String superLclName = null; long tClassgroup = System.currentTimeMillis(); for(VClass clz : vclasses){ @@ -193,7 +192,7 @@ public class IndividualToSolrDocument implements Obj2DocIface { StringBuffer objectNames = new StringBuffer(); objectNames.append(""); String t=null; - StringBuffer addUri = new StringBuffer(); + addUri = new StringBuffer(); addUri.append(""); List objectPropertyStatements = ent.getObjectPropertyStatements(); if (objectPropertyStatements != null) { @@ -214,21 +213,17 @@ public class IndividualToSolrDocument implements Obj2DocIface { } // adding PHI value - boolean isPerson = (superClassNames.contains("Person")) ? true : false ; - String adjInfo[] = searchQueryHandler.getAdjacentNodes(uri,isPerson); - StringBuffer info = new StringBuffer(); - info.append(adjInfo[0]); - info.append(addUri.toString()); + //doc.addField(term.ADJACENT_NODES,info.toString()); // adding adjacent nodes - float phi = calculatePHI(info); + //doc.addField(term.PHI, phi); // adding phi value - doc.addField(term.NAME_RAW, value, NAME_BOOST+phi); - doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST+phi); - doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST+phi); - doc.addField(term.NAME_STEMMED, value, NAME_BOOST+phi); + doc.addField(term.NAME_RAW, value, NAME_BOOST); + doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST); + doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST); + doc.addField(term.NAME_STEMMED, value, NAME_BOOST); doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST); long tContextNodes = System.currentTimeMillis(); @@ -237,24 +232,7 @@ public class IndividualToSolrDocument implements Obj2DocIface { StringBuffer targetInfo = new StringBuffer(); targetInfo.append(""); - if(superClassNames.contains("Agent")){ - objectNames.append(" "); - objectNames.append(searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI())); - objectNames.append(" "); - objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI())); - objectNames.append(" "); - objectNames.append(searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI())); - objectNames.append(" "); - objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI())); - objectNames.append(" "); - objectNames.append(searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI())); - } - if(superClassNames.contains("InformationResource")){ - targetInfo.append(" "); - targetInfo.append(searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI())); - } - - + doc.addField(term.targetInfo, targetInfo.toString() + adjInfo[1]); log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes)); @@ -263,8 +241,8 @@ public class IndividualToSolrDocument implements Obj2DocIface { long tMoniker = System.currentTimeMillis(); //boost for entity - // if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0) - // doc.setDocumentBoost(ent.getSearchBoost()); + if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0) + doc.setDocumentBoost(ent.getSearchBoost()); //thumbnail try{ @@ -314,10 +292,9 @@ public class IndividualToSolrDocument implements Obj2DocIface { log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); String alltext = allTextValue.toString(); - doc.addField(term.ALLTEXT, alltext, 2.5F*phi); - doc.addField(term.ALLTEXTUNSTEMMED, alltext, 2.5F*phi); + doc.addField(term.ALLTEXT, alltext, ALL_TEXT_BOOST); + doc.addField(term.ALLTEXTUNSTEMMED, alltext, ALL_TEXT_BOOST); doc.addField(term.ALLTEXT_PHONETIC, alltext, PHONETIC_BOOST); - doc.setDocumentBoost(2.5F*phi); //run the document modifiers if( documentModifiers != null ){ @@ -334,30 +311,7 @@ public class IndividualToSolrDocument implements Obj2DocIface { * Method for calculation of PHI for a doc. */ - public float calculatePHI(StringBuffer adjNodes){ - - StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," "); - String uri=null; - float phi=0.1F; - float beta=0; - int size=0; - while(nodes.hasMoreTokens()){ - size++; - uri = nodes.nextToken(); - if(betas.containsKey(uri)){ // get if already calculated - phi += betas.get(uri); - }else{ // query if not calculated and put in map - beta = searchQueryHandler.calculateBeta(uri); - betas.put(uri, beta); - phi+=beta; - } - } - if(size>0) - phi = (float)phi/size; - else - phi = 1; - return phi; - } + // public IndividualToSolrDocument(Entity2LuceneDoc e2d){ //// entityToLucene = e2d; @@ -423,6 +377,7 @@ public class IndividualToSolrDocument implements Obj2DocIface { public static float NAME_BOOST = 2.0F; + public static float ALL_TEXT_BOOST = 2.5F; public static float PHONETIC_BOOST = 0.1F;