This commit is contained in:
anupsawant 2011-05-31 22:43:45 +00:00
parent e7fc65ab58
commit adac7e86a0
5 changed files with 302 additions and 186 deletions

View file

@ -552,7 +552,7 @@ public class SearchQueryHandler {
RDFNode linkedAuthor = soln.get("linkedAuthor"); RDFNode linkedAuthor = soln.get("linkedAuthor");
if(linkedAuthor != null){ if(linkedAuthor != null){
propertyValues.append(" publications " + linkedAuthor.toString()); propertyValues.append(" publications " + linkedAuthor.toString() + " publications ");
}else{ }else{
log.debug("linkedAuthor is null "); log.debug("linkedAuthor is null ");
} }
@ -587,7 +587,7 @@ public class SearchQueryHandler {
RDFNode features = soln.get("features"); RDFNode features = soln.get("features");
if(features != null){ if(features != null){
propertyValues.append(" publications " + features.toString()); propertyValues.append(" publications " + features.toString() + " publications ");
}else{ }else{
log.debug("features is null "); log.debug("features is null ");
} }
@ -605,17 +605,6 @@ public class SearchQueryHandler {
} }
public float calculateBeta(String uri){
float beta=0;
RDFNode node = (Resource) fullModel.getResource(uri);
StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node);
int Conn = stmtItr.toList().size();
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public String[] getAdjacentNodes(String uri,boolean isPerson){ public String[] getAdjacentNodes(String uri,boolean isPerson){
List<String> queryList = new ArrayList<String>(); List<String> queryList = new ArrayList<String>();
@ -705,7 +694,7 @@ public class SearchQueryHandler {
coauthor = soln.get("coauthor"); coauthor = soln.get("coauthor");
if(coauthor!=null){ if(coauthor!=null){
coauthorNames.add(" co-authors " + coauthor.toString()); coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
} }
} }
}catch(Exception e){ }catch(Exception e){

View file

@ -1,84 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.Hashtable;
import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
public class CalculateBeta implements DocumentModifier{
private static final String[] fieldsToAddBetaTo = {
VitroTermNames.NAME_RAW,
VitroTermNames.NAME_LOWERCASE,
VitroTermNames.NAME_UNSTEMMED,
VitroTermNames.NAME_STEMMED
};
private static final String[] fieldsToMultiplyBetaBy = {
VitroTermNames.ALLTEXT,
VitroTermNames.ALLTEXTUNSTEMMED,
};
Model fullModel;
int totalInd;
public static Map<String,Float> betas = new Hashtable<String,Float>();
public CalculateBeta(OntModel fullModel){
this.fullModel=fullModel;
this.totalInd = fullModel.listIndividuals().toList().size();
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc) {
// TODO Auto-generated method stub
// get beta value
float beta = 0;
if(betas.containsKey(individual.getURI())){
beta = betas.get(individual.getURI());
}else{
beta = calculateBeta(individual.getURI()); // or calculate & put in map
betas.put(individual.getURI(), beta);
}
//doc.addField(term.BETA,beta);
for(String term: fieldsToAddBetaTo){
SolrInputField f = doc.getField( term );
f.setBoost( beta + f.getBoost() );
}
for(String term: fieldsToMultiplyBetaBy){
SolrInputField f = doc.getField( term );
f.setBoost( beta * f.getBoost() );
}
doc.setDocumentBoost( beta * doc.getDocumentBoost() );
}
public float calculateBeta(String uri){
float beta=0;
RDFNode node = (Resource) fullModel.getResource(uri);
StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node);
int Conn = stmtItr.toList().size();
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public Float getBeta(String uri){
return betas.get(uri);
}
}

View file

@ -0,0 +1,278 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.ontology.OntModel;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler;
public class CalculateParameters implements DocumentModifier {
Model fullModel;
int totalInd;
public static Map<String,Float> betaMap = new Hashtable<String,Float>();
private float phi;
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
private static Log log = LogFactory.getLog(CalculateParameters.class);
private static final String[] fieldsToAddBetaTo = {
VitroTermNames.NAME_RAW,
VitroTermNames.NAME_LOWERCASE,
VitroTermNames.NAME_UNSTEMMED,
VitroTermNames.NAME_STEMMED
};
private static final String[] fieldsToMultiplyBetaBy = {
VitroTermNames.ALLTEXT,
VitroTermNames.ALLTEXTUNSTEMMED,
};
public CalculateParameters(OntModel fullModel){
this.fullModel=fullModel;
this.totalInd = fullModel.listIndividuals().toList().size();
}
public float calculateBeta(String uri){
float beta=0;
RDFNode node = (Resource) fullModel.getResource(uri);
StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node);
int Conn = stmtItr.toList().size();
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public float calculatePhi(StringBuffer adjNodes){
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
String uri=null;
float beta=0;
int size=0;
phi = 0.1F;
while(nodes.hasMoreTokens()){
size++;
uri = nodes.nextToken();
if(hasBeta(uri)){ // get if already calculated
phi += getBeta(uri);
}else{ // query if not calculated and put in map
beta = calculateBeta(uri);
setBeta(uri, beta);
phi+=beta;
}
}
if(size>0)
phi = (float)phi/size;
else
phi = 1;
return phi;
}
public Float getBeta(String uri){
return betaMap.get(uri);
}
public float getPhi(){
return phi;
}
public boolean hasBeta(String uri){
return betaMap.containsKey(uri);
}
public void setBeta(String uri, float beta){
betaMap.put(uri, beta);
}
public String[] getAdjacentNodes(String uri,boolean isPerson){
List<String> queryList = new ArrayList<String>();
Set<String> adjacentNodes = new HashSet<String>();
Set<String> coauthorNames = new HashSet<String>();
String[] info = new String[]{"",""};
StringBuffer adjacentNodesConcat = new StringBuffer();
StringBuffer coauthorBuff = new StringBuffer();
adjacentNodesConcat.append("");
coauthorBuff.append("");
queryList.add(prefix +
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
" WHERE { " +
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" ?uri ?prop ?obj . " +
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj ?prop2 ?obj2 . " +
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
" ?obj2 ?prop3 ?obj3 . " +
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj3 ?prop4 ?adjobj . " +
" ?adjobj rdfs:label ?adjobjLabel . " +
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?prop3!=rdf:type) . " +
" FILTER (?prop4!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
"}");
queryList.add(prefix +
" SELECT ?adjobj " +
" WHERE{ " +
" ?uri rdf:type foaf:Agent . " +
" ?uri ?prop ?obj . " +
" ?obj ?prop2 ?adjobj . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER isURI(?obj) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
" FILTER isURI(?adjobj) . " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
" UNION " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
"}");
Query query;
QuerySolution soln;
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
Iterator<String> queryItr = queryList.iterator();
fullModel.enterCriticalSection(Lock.READ);
Resource adjacentIndividual = null;
RDFNode coauthor = null;
try{
while(queryItr.hasNext()){
if(!isPerson){
queryItr.next(); // we don't want first query to execute if the ind is not a person.
}
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,fullModel,initialBinding);
try{
ResultSet results = qexec.execSelect();
while(results.hasNext()){
soln = results.nextSolution();
adjacentIndividual = (Resource)soln.get("adjobj");
if(adjacentIndividual!=null){
adjacentNodes.add(adjacentIndividual.getURI());
}
coauthor = soln.get("coauthor");
if(coauthor!=null){
coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors ");
}
}
}catch(Exception e){
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
}finally{
qexec.close();
}
}
queryList = null;
Iterator<String> itr = adjacentNodes.iterator();
while(itr.hasNext()){
adjacentNodesConcat.append(itr.next() + " ");
}
info[0] = adjacentNodesConcat.toString();
itr = coauthorNames.iterator();
while(itr.hasNext()){
coauthorBuff.append(itr.next());
}
info[1] = coauthorBuff.toString();
}
catch(Throwable t){
log.error(t,t);
}finally{
fullModel.leaveCriticalSection();
adjacentNodes = null;
adjacentNodesConcat = null;
coauthorBuff = null;
}
return info;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc) {
// TODO Auto-generated method stub
// calculate beta value.
float beta = 0;
String uri = individual.getURI();
if(hasBeta(uri)){
beta = getBeta(uri);
}else{
beta = calculateBeta(uri); // or calculate & put in map
setBeta(uri,beta);
}
boolean isPerson = (IndividualToSolrDocument.superClassNames.contains("Person")) ? true : false ;
String adjInfo[] = getAdjacentNodes(uri,isPerson);
StringBuffer info = new StringBuffer();
info.append(adjInfo[0]);
info.append(IndividualToSolrDocument.addUri.toString());
phi = calculatePhi(info);
for(String term: fieldsToAddBetaTo){
SolrInputField f = doc.getField( term );
f.setBoost( getBeta(uri) + phi + IndividualToSolrDocument.NAME_BOOST);
}
for(String term: fieldsToMultiplyBetaBy){
SolrInputField f = doc.getField( term );
f.setBoost( getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
}
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
}
}

View file

@ -1,22 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
public class CalculatePhi implements DocumentModifier{
CalculateBeta betas;
// maybe Phi needs Beta?
public CalculatePhi(CalculateBeta betas){
this.betas = betas;
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc) {
// TODO Auto-generated method stub
}
}

View file

@ -53,19 +53,18 @@ public class IndividualToSolrDocument implements Obj2DocIface {
private SearchQueryHandler searchQueryHandler; private SearchQueryHandler searchQueryHandler;
public static ArrayList<String> superClassNames = null;
public static StringBuffer addUri = null;
private List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>(); private List<DocumentModifier> documentModifiers = new ArrayList<DocumentModifier>();
private static List<String> contextNodeClassNames = new ArrayList<String>(); private static List<String> contextNodeClassNames = new ArrayList<String>();
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch, IndividualProhibitedFromSearch individualProhibitedFromSearch,
SearchQueryHandler searchQueryHandler){ SearchQueryHandler searchQueryHandler){
this.classesProhibitedFromSearch = classesProhibitedFromSearch; this(classesProhibitedFromSearch,individualProhibitedFromSearch,searchQueryHandler,null);
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
this.searchQueryHandler = searchQueryHandler;
fillContextNodes();
} }
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
@ -118,7 +117,7 @@ public class IndividualToSolrDocument implements Obj2DocIface {
// Types and classgroups // Types and classgroups
boolean prohibited = false; boolean prohibited = false;
List<VClass> vclasses = ent.getVClasses(false); List<VClass> vclasses = ent.getVClasses(false);
ArrayList<String> superClassNames = new ArrayList<String>(); superClassNames = new ArrayList<String>();
String superLclName = null; String superLclName = null;
long tClassgroup = System.currentTimeMillis(); long tClassgroup = System.currentTimeMillis();
for(VClass clz : vclasses){ for(VClass clz : vclasses){
@ -193,7 +192,7 @@ public class IndividualToSolrDocument implements Obj2DocIface {
StringBuffer objectNames = new StringBuffer(); StringBuffer objectNames = new StringBuffer();
objectNames.append(""); objectNames.append("");
String t=null; String t=null;
StringBuffer addUri = new StringBuffer(); addUri = new StringBuffer();
addUri.append(""); addUri.append("");
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements(); List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
if (objectPropertyStatements != null) { if (objectPropertyStatements != null) {
@ -214,21 +213,17 @@ public class IndividualToSolrDocument implements Obj2DocIface {
} }
// adding PHI value // adding PHI value
boolean isPerson = (superClassNames.contains("Person")) ? true : false ;
String adjInfo[] = searchQueryHandler.getAdjacentNodes(uri,isPerson);
StringBuffer info = new StringBuffer();
info.append(adjInfo[0]);
info.append(addUri.toString());
//doc.addField(term.ADJACENT_NODES,info.toString()); // adding adjacent nodes //doc.addField(term.ADJACENT_NODES,info.toString()); // adding adjacent nodes
float phi = calculatePHI(info);
//doc.addField(term.PHI, phi); // adding phi value //doc.addField(term.PHI, phi); // adding phi value
doc.addField(term.NAME_RAW, value, NAME_BOOST+phi); doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST+phi); doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST+phi); doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST+phi); doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST); doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
long tContextNodes = System.currentTimeMillis(); long tContextNodes = System.currentTimeMillis();
@ -237,23 +232,6 @@ public class IndividualToSolrDocument implements Obj2DocIface {
StringBuffer targetInfo = new StringBuffer(); StringBuffer targetInfo = new StringBuffer();
targetInfo.append(""); targetInfo.append("");
if(superClassNames.contains("Agent")){
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI()));
}
if(superClassNames.contains("InformationResource")){
targetInfo.append(" ");
targetInfo.append(searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()));
}
doc.addField(term.targetInfo, targetInfo.toString() + adjInfo[1]); doc.addField(term.targetInfo, targetInfo.toString() + adjInfo[1]);
@ -263,8 +241,8 @@ public class IndividualToSolrDocument implements Obj2DocIface {
long tMoniker = System.currentTimeMillis(); long tMoniker = System.currentTimeMillis();
//boost for entity //boost for entity
// if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0) if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
// doc.setDocumentBoost(ent.getSearchBoost()); doc.setDocumentBoost(ent.getSearchBoost());
//thumbnail //thumbnail
try{ try{
@ -314,10 +292,9 @@ public class IndividualToSolrDocument implements Obj2DocIface {
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
String alltext = allTextValue.toString(); String alltext = allTextValue.toString();
doc.addField(term.ALLTEXT, alltext, 2.5F*phi); doc.addField(term.ALLTEXT, alltext, ALL_TEXT_BOOST);
doc.addField(term.ALLTEXTUNSTEMMED, alltext, 2.5F*phi); doc.addField(term.ALLTEXTUNSTEMMED, alltext, ALL_TEXT_BOOST);
doc.addField(term.ALLTEXT_PHONETIC, alltext, PHONETIC_BOOST); doc.addField(term.ALLTEXT_PHONETIC, alltext, PHONETIC_BOOST);
doc.setDocumentBoost(2.5F*phi);
//run the document modifiers //run the document modifiers
if( documentModifiers != null ){ if( documentModifiers != null ){
@ -334,30 +311,7 @@ public class IndividualToSolrDocument implements Obj2DocIface {
* Method for calculation of PHI for a doc. * Method for calculation of PHI for a doc.
*/ */
public float calculatePHI(StringBuffer adjNodes){
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
String uri=null;
float phi=0.1F;
float beta=0;
int size=0;
while(nodes.hasMoreTokens()){
size++;
uri = nodes.nextToken();
if(betas.containsKey(uri)){ // get if already calculated
phi += betas.get(uri);
}else{ // query if not calculated and put in map
beta = searchQueryHandler.calculateBeta(uri);
betas.put(uri, beta);
phi+=beta;
}
}
if(size>0)
phi = (float)phi/size;
else
phi = 1;
return phi;
}
// public IndividualToSolrDocument(Entity2LuceneDoc e2d){ // public IndividualToSolrDocument(Entity2LuceneDoc e2d){
//// entityToLucene = e2d; //// entityToLucene = e2d;
@ -423,6 +377,7 @@ public class IndividualToSolrDocument implements Obj2DocIface {
public static float NAME_BOOST = 2.0F; public static float NAME_BOOST = 2.0F;
public static float ALL_TEXT_BOOST = 2.5F;
public static float PHONETIC_BOOST = 0.1F; public static float PHONETIC_BOOST = 0.1F;