diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java new file mode 100644 index 00000000..432c3573 --- /dev/null +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java @@ -0,0 +1,298 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.common.SolrInputDocument; + +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.QuerySolutionMap; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.shared.Lock; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; + + +public class CalculateParameters implements DocumentModifier { + + private boolean shutdown = false; + private Dataset dataset; + // public static int totalInd=1; + + private static final String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix obo: " + + " prefix bibo: "; + + private static final String betaQuery = prefix + " SELECT count(distinct ?inLinks) " + + " WHERE { " + + " ?uri rdf:type owl:Thing . " + + " ?inLinks ?prop ?uri . " + + " } "; + + private static final String totalCountQuery = prefix + " SELECT count(distinct ?ind) " + + " WHERE { " + + " ?ind rdf:type owl:Thing . " + + " } "; + + private static Log log = LogFactory.getLog(CalculateParameters.class); + + public CalculateParameters(Dataset dataset){ + this.dataset =dataset; + // new Thread(new TotalInd(this.dataset,totalCountQuery)).start(); + } + + public CalculateParameters(){ + super(); + } + + public float calculateBeta(String uri){ + float beta=0; + int Conn=0; + Query query; + QuerySolutionMap initialBinding = new QuerySolutionMap(); + QuerySolution soln = null; + Resource uriResource = ResourceFactory.createResource(uri); + initialBinding.add("uri", uriResource); + dataset.getLock().enterCriticalSection(Lock.READ); + QueryExecution qexec=null; + try{ + query = QueryFactory.create(betaQuery,Syntax.syntaxARQ); + qexec = QueryExecutionFactory.create(query,dataset,initialBinding); + ResultSet results = qexec.execSelect(); + List resultVars = results.getResultVars(); + if(resultVars!=null && resultVars.size()!=0){ + soln = results.next(); + Conn = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm()); + } + }catch(Throwable t){ + if( ! shutdown ) + log.error(t,t); + }finally{ + if( qexec != null ) + qexec.close(); + dataset.getLock().leaveCriticalSection(); + } + + beta = (float)Conn; + //beta *= 100; + beta += 1; + + // sigmoid function to keep beta between 0 to 1; + + beta = (float) (1 / ( 1 + Math.pow(Math.E,(-beta)))); + + if(beta > 1) + log.info("Beta higher than 1 : " + beta); + else if(beta <= 0) + log.info("Beta lower < = 0 : " + beta); + return beta; + } + + + public String[] getAdjacentNodes(String uri){ + + List queryList = new ArrayList(); + Set adjacentNodes = new HashSet(); + Set coauthorNames = new HashSet(); + String[] info = new String[]{"",""}; + StringBuffer adjacentNodesConcat = new StringBuffer(); + StringBuffer coauthorBuff = new StringBuffer(); + adjacentNodesConcat.append(""); + coauthorBuff.append(""); + + queryList.add(prefix + + " SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " + + " WHERE { " + + " ?uri rdf:type . " + + " ?uri ?prop ?obj . " + + " ?obj rdf:type . " + + " ?obj ?prop2 ?obj2 . " + + " ?obj2 rdf:type obo:IAO_0000030 . " + + " ?obj2 ?prop3 ?obj3 . " + + " ?obj3 rdf:type . " + + " ?obj3 ?prop4 ?adjobj . " + + " ?adjobj rdfs:label ?adjobjLabel . " + + " ?adjobj rdf:type . " + + + " FILTER (?prop !=rdf:type) . " + + " FILTER (?prop2!=rdf:type) . " + + " FILTER (?prop3!=rdf:type) . " + + " FILTER (?prop4!=rdf:type) . " + + " FILTER (?adjobj != ?uri) . " + + "}"); + + queryList.add(prefix + + " SELECT ?adjobj " + + " WHERE{ " + + + " ?uri rdf:type foaf:Agent . " + + " ?uri ?prop ?obj . " + + " ?obj ?prop2 ?adjobj . " + + + + " FILTER (?prop !=rdf:type) . " + + " FILTER isURI(?obj) . " + + + " FILTER (?prop2!=rdf:type) . " + + " FILTER (?adjobj != ?uri) . " + + " FILTER isURI(?adjobj) . " + + + " { ?adjobj rdf:type . } " + + " UNION " + + " { ?adjobj rdf:type . } " + + " UNION " + + " { ?adjobj rdf:type obo:IAO_0000030 . } " + + " UNION " + + " { ?adjobj rdf:type . } ." + + "}"); + + Query query; + + QuerySolution soln; + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + Iterator queryItr = queryList.iterator(); + + dataset.getLock().enterCriticalSection(Lock.READ); + Resource adjacentIndividual = null; + RDFNode coauthor = null; + try{ + while(queryItr.hasNext()){ + /*if(!isPerson){ + queryItr.next(); // we don't want first query to execute if the ind is not a person. + }*/ + query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ); + QueryExecution qexec = QueryExecutionFactory.create(query,dataset,initialBinding); + try{ + ResultSet results = qexec.execSelect(); + while(results.hasNext()){ + soln = results.nextSolution(); + + adjacentIndividual = (Resource)soln.get("adjobj"); + if(adjacentIndividual!=null){ + adjacentNodes.add(adjacentIndividual.getURI()); + } + + coauthor = soln.get("coauthor"); + if(coauthor!=null){ + coauthorNames.add(" co-authors " + coauthor.toString() + " co-authors "); + } + } + }catch(Exception e){ + if( ! shutdown ) + log.error("Error found in getAdjacentNodes method of SearchQueryHandler"); + }finally{ + qexec.close(); + } + } + queryList = null; + Iterator itr = adjacentNodes.iterator(); + while(itr.hasNext()){ + adjacentNodesConcat.append(itr.next() + " "); + } + + info[0] = adjacentNodesConcat.toString(); + + itr = coauthorNames.iterator(); + while(itr.hasNext()){ + coauthorBuff.append(itr.next()); + } + + info[1] = coauthorBuff.toString(); + + } + catch(Throwable t){ + if( ! shutdown ) + log.error(t,t); + }finally{ + dataset.getLock().leaveCriticalSection(); + adjacentNodes = null; + adjacentNodesConcat = null; + coauthorBuff = null; + } + return info; + } + + @Override + public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) { + // TODO Auto-generated method stub + // calculate beta value. + log.debug("Parameter calculation starts.."); + float beta = calculateBeta(individual.getURI()); + doc.addField(VitroSearchTermNames.BETA, beta); + doc.setDocumentBoost(beta + doc.getDocumentBoost() ); + log.debug("Parameter calculation is done"); + } + + + public void shutdown(){ + shutdown=true; + } +} + +class TotalInd implements Runnable{ + private Dataset dataset; + private String totalCountQuery; + private static Log log = LogFactory.getLog(TotalInd.class); + + public TotalInd(Dataset dataset,String totalCountQuery){ + this.dataset = dataset; + this.totalCountQuery = totalCountQuery; + + } + public void run(){ + int totalInd=0; + Query query; + QuerySolution soln = null; + dataset.getLock().enterCriticalSection(Lock.READ); + QueryExecution qexec = null; + + try{ + query = QueryFactory.create(totalCountQuery,Syntax.syntaxARQ); + qexec = QueryExecutionFactory.create(query,dataset); + ResultSet results = qexec.execSelect(); + List resultVars = results.getResultVars(); + + if(resultVars!=null && resultVars.size()!=0){ + soln = results.next(); + totalInd = Integer.parseInt(soln.getLiteral(resultVars.get(0)).getLexicalForm()); + } + //CalculateParameters.totalInd = totalInd; + //log.info("Total number of individuals in the system are : " + CalculateParameters.totalInd); + }catch(Throwable t){ + log.error(t,t); + }finally{ + if( qexec != null ) + qexec.close(); + dataset.getLock().leaveCriticalSection(); + } + + } +} diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java index 927ae69d..b27bfbad 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java @@ -13,7 +13,6 @@ import com.hp.hpl.jena.query.DatasetFactory; import edu.cornell.mannlib.vitro.webapp.dao.ModelAccess; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; -import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.CalculateParameters; import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnNamespace; import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SearchIndexExcluder;