[VIVO-1031] Use streaming result set for co- visualisations

This commit is contained in:
grahamtriggs 2015-10-19 15:51:57 +01:00
parent 208f103629
commit 8fc40fcf98
2 changed files with 457 additions and 485 deletions

View file

@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import net.sf.jga.algorithms.Unique;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.jena.iri.IRI; import org.apache.jena.iri.IRI;
@ -23,7 +25,6 @@ import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation; import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.RDFNode;
@ -58,10 +59,6 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
private Log log; private Log log;
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
public CoAuthorshipQueryRunner(String egoURI, public CoAuthorshipQueryRunner(String egoURI,
RDFService rdfService, Log log) { RDFService rdfService, Log log) {
@ -69,18 +66,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
this.rdfService = rdfService; this.rdfService = rdfService;
this.log = log; this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
} }
private CollaborationData createQueryResult(ResultSet resultSet) { private static class QueryResultConsumer extends ResultSetConsumer {
Set<Collaborator> nodes = new HashSet<Collaborator>(); Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>(); Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>();
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors = Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors = new HashMap<String, Set<Collaborator>>();
new HashMap<String, Set<Collaborator>>();
Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>(); Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>();
Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>(); Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>();
@ -88,13 +80,15 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
Set<Collaboration> edges = new HashSet<Collaboration>(); Set<Collaboration> edges = new HashSet<Collaboration>();
while (resultSet.hasNext()) { private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
QuerySolution solution = resultSet.nextSolution(); private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
@Override
protected void processQuerySolution(QuerySolution qs) {
/* /*
* We only want to create only ONE ego node. * We only want to create only ONE ego node.
* */ * */
RDFNode egoAuthorURLNode = solution.get(QueryFieldLabels.AUTHOR_URL); RDFNode egoAuthorURLNode = qs.get(QueryFieldLabels.AUTHOR_URL);
if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) { if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) {
egoNode = nodeURLToVO.get(egoAuthorURLNode.toString()); egoNode = nodeURLToVO.get(egoAuthorURLNode.toString());
@ -105,19 +99,19 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
nodes.add(egoNode); nodes.add(egoNode);
nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode); nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.AUTHOR_LABEL); RDFNode authorLabelNode = qs.get(QueryFieldLabels.AUTHOR_LABEL);
if (authorLabelNode != null) { if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString()); egoNode.setCollaboratorName(authorLabelNode.toString());
} }
} }
RDFNode documentNode = solution.get(QueryFieldLabels.DOCUMENT_URL); RDFNode documentNode = qs.get(QueryFieldLabels.DOCUMENT_URL);
Activity biboDocument; Activity biboDocument;
if (biboDocumentURLToVO.containsKey(documentNode.toString())) { if (biboDocumentURLToVO.containsKey(documentNode.toString())) {
biboDocument = biboDocumentURLToVO.get(documentNode.toString()); biboDocument = biboDocumentURLToVO.get(documentNode.toString());
} else { } else {
biboDocument = createDocumentVO(solution, documentNode.toString()); biboDocument = createDocumentVO(qs, documentNode.toString());
biboDocumentURLToVO.put(documentNode.toString(), biboDocument); biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
} }
@ -128,14 +122,14 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
* we do not want a co-author node or Collaboration if the publication has only one * we do not want a co-author node or Collaboration if the publication has only one
* author and that happens to be the ego. * author and that happens to be the ego.
* */ * */
if (solution.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase( if (qs.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) { qs.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
continue; return;
} }
Collaborator coAuthorNode; Collaborator coAuthorNode;
RDFNode coAuthorURLNode = solution.get(QueryFieldLabels.CO_AUTHOR_URL); RDFNode coAuthorURLNode = qs.get(QueryFieldLabels.CO_AUTHOR_URL);
if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) { if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) {
coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString()); coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString());
@ -146,7 +140,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
nodes.add(coAuthorNode); nodes.add(coAuthorNode);
nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode); nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode);
RDFNode coAuthorLabelNode = solution.get(QueryFieldLabels.CO_AUTHOR_LABEL); RDFNode coAuthorLabelNode = qs.get(QueryFieldLabels.CO_AUTHOR_LABEL);
if (coAuthorLabelNode != null) { if (coAuthorLabelNode != null) {
coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString()); coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString());
} }
@ -158,11 +152,11 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) { if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) {
coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors
.get(biboDocument.getActivityURI()); .get(biboDocument.getActivityURI());
} else { } else {
coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>(); coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>();
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(), biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
coAuthorsForCurrentBiboDocument); coAuthorsForCurrentBiboDocument);
} }
coAuthorsForCurrentBiboDocument.add(coAuthorNode); coAuthorsForCurrentBiboDocument.add(coAuthorNode);
@ -183,70 +177,173 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
edges.add(egoCoAuthorEdge); edges.add(egoCoAuthorEdge);
edgeUniqueIdentifierToVO.put( edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(), getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coAuthorNode.getCollaboratorID()), coAuthorNode.getCollaboratorID()),
egoCoAuthorEdge); egoCoAuthorEdge);
} }
}
@Override
protected void endProcessing() {
/*
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
/*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
}
public CollaborationData getCollaborationData() {
return new CoAuthorshipData(egoNode, nodes, edges);
}
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
Activity biboDocument = new Activity(documentURL);
RDFNode publicationDateNode = solution.get(QueryFieldLabels.DOCUMENT_PUBLICATION_DATE);
if (publicationDateNode != null) {
biboDocument.setActivityDate(publicationDateNode.toString());
}
return biboDocument;
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
} }
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
/* Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
/* /*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors * In order to leverage the nested "for loop" for making edges between all the
* ego, A & B then we have already created edges like, * co-authors we need to create a list out of the set first.
* ego - A * */
* ego - B List<Collaborator> coAuthorNodes =
* The below sub-routine will take care of, new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
Collections.sort(coAuthorNodes, new CollaboratorComparator());
return new CoAuthorshipData(egoNode, nodes, edges); int numOfCoAuthors = coAuthorNodes.size();
}
private void removeLowQualityNodesAndEdges( for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
Set<Collaborator> nodes, for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>(); Collaborator coAuthor1 = coAuthorNodes.get(ii);
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private void removeLowQualityNodesAndEdges(
Set<Collaborator> nodes,
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) { : biboDocumentURLToCoAuthors.entrySet()) {
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) { if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
Activity currentBiboDocument = biboDocumentURLToVO Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry.getKey()); .get(currentBiboDocumentEntry.getKey());
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>(); Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
for (Collaboration currentEdge : edges) { for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorDocuments = Set<Activity> currentCollaboratorDocuments =
currentEdge.getCollaborationActivities(); currentEdge.getCollaborationActivities();
if (currentCollaboratorDocuments.contains(currentBiboDocument)) { if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
currentCollaboratorDocuments.remove(currentBiboDocument); currentCollaboratorDocuments.remove(currentBiboDocument);
@ -265,114 +362,11 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
} }
} }
} }
}
nodes.removeAll(nodesToBeRemoved);
}
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-authors we need to create a list out of the set first.
* */
List<Collaborator> coAuthorNodes =
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
Collections.sort(coAuthorNodes, new CollaboratorComparator());
int numOfCoAuthors = coAuthorNodes.size();
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Collaborator coAuthor1 = coAuthorNodes.get(ii);
Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
} }
nodes.removeAll(nodesToBeRemoved);
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
} }
} /* END QUERY RUNNER */
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
Activity biboDocument = new Activity(documentURL);
RDFNode publicationDateNode = solution.get(QueryFieldLabels.DOCUMENT_PUBLICATION_DATE);
if (publicationDateNode != null) {
biboDocument.setActivityDate(publicationDateNode.toString());
}
return biboDocument;
} }
private String generateEgoCoAuthorshipSparqlQuery(String queryURI) { private String generateEgoCoAuthorshipSparqlQuery(String queryURI) {
@ -455,19 +449,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
throw new MalformedQueryParametersException("URI parameter is either null or empty."); throw new MalformedQueryParametersException("URI parameter is either null or empty.");
} }
InputStream is = null;
ResultSet rs = null;
try { try {
is = rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), RDFService.ResultFormat.JSON); QueryResultConsumer consumer = new QueryResultConsumer();
rs = ResultSetFactory.fromJSON(is); rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), consumer);
data = createQueryResult(rs); data = consumer.getCollaborationData();
} catch (RDFServiceException e) { } catch (RDFServiceException e) {
log.error("Unable to execute query", e); log.error("Unable to execute query", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
} }
CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry(); CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry();

View file

@ -2,7 +2,6 @@
package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator; package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator;
import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
@ -13,9 +12,9 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -23,14 +22,8 @@ import org.apache.jena.iri.IRI;
import org.apache.jena.iri.IRIFactory; import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation; import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData; import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData;
@ -60,10 +53,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName()); private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
private long before, after; private long before, after;
private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = "" private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = ""
@ -95,9 +84,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
this.egoURI = egoURI; this.egoURI = egoURI;
this.rdfService = rdfService; this.rdfService = rdfService;
// this.log = log; // this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
} }
private String generateEgoCoPIquery(String queryURI) { private String generateEgoCoPIquery(String queryURI) {
@ -294,19 +280,13 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
before = System.currentTimeMillis(); before = System.currentTimeMillis();
InputStream is = null;
ResultSet rs = null;
try { try {
is = rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), RDFService.ResultFormat.JSON); QueryResultConsumer consumer = new QueryResultConsumer();
rs = ResultSetFactory.fromJSON(is); rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), consumer);
data = createQueryResult(rs); data = consumer.getData();
} catch (RDFServiceException e) { } catch (RDFServiceException e) {
log.error("Unable to execute query", e); log.error("Unable to execute query", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
} }
after = System.currentTimeMillis(); after = System.currentTimeMillis();
@ -356,34 +336,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
} }
} }
private static class QueryResultConsumer extends ResultSetConsumer {
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
private CollaborationData createQueryResult(ResultSet resultSet) {
Set<Collaborator> nodes = new HashSet<Collaborator>(); Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> grantURLToVO = new HashMap<String, Activity>(); Map<String, Activity> grantURLToVO = new HashMap<String, Activity>();
@ -395,111 +348,116 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
Set<Collaboration> edges = new HashSet<Collaboration>(); Set<Collaboration> edges = new HashSet<Collaboration>();
before = System.currentTimeMillis(); private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
while (resultSet.hasNext()) { private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
QuerySolution solution = resultSet.nextSolution();
@Override
protected void processQuerySolution(QuerySolution qs) {
/* /*
* We only want to create only ONE ego node. * We only want to create only ONE ego node.
* */ * */
RDFNode egoPIURLNode = solution.get(QueryFieldLabels.PI_URL); RDFNode egoPIURLNode = qs.get(QueryFieldLabels.PI_URL);
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) { if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
egoNode = nodeURLToVO.get(egoPIURLNode.toString()); egoNode = nodeURLToVO.get(egoPIURLNode.toString());
} else { } else {
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator); egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode); nodes.add(egoNode);
nodeURLToVO.put(egoPIURLNode.toString(), egoNode); nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.PI_LABEL); RDFNode authorLabelNode = qs.get(QueryFieldLabels.PI_LABEL);
if (authorLabelNode != null) { if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString()); egoNode.setCollaboratorName(authorLabelNode.toString());
}
} }
log.debug("PI: " + egoNode.getIndividualLabel()); }
log.debug("PI: " + egoNode.getIndividualLabel());
RDFNode grantNode = solution.get(QueryFieldLabels.GRANT_URL); RDFNode grantNode = qs.get(QueryFieldLabels.GRANT_URL);
Activity grant; Activity grant;
if (grantURLToVO.containsKey(grantNode.toString())) { if (grantURLToVO.containsKey(grantNode.toString())) {
grant = grantURLToVO.get(grantNode.toString()); grant = grantURLToVO.get(grantNode.toString());
} else { } else {
grant = createGrantVO(solution, grantNode.toString()); grant = createGrantVO(qs, grantNode.toString());
grantURLToVO.put(grantNode.toString(), grant); grantURLToVO.put(grantNode.toString(), grant);
} }
egoNode.addActivity(grant); egoNode.addActivity(grant);
log.debug("Adding grant: " + grant.getIndividualLabel()); log.debug("Adding grant: " + grant.getIndividualLabel());
/* /*
* After some discussion we concluded that for the purpose of this visualization * After some discussion we concluded that for the purpose of this visualization
* we do not want a co-pi node or edge if the grant has only one * we do not want a co-pi node or edge if the grant has only one
* pi and that happens to be the ego. * pi and that happens to be the ego.
* */ * */
if (solution.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase( if (qs.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_PI_URL).toString())) { qs.get(QueryFieldLabels.CO_PI_URL).toString())) {
continue; return;
}
Collaborator coPINode;
RDFNode coPIURLNode = qs.get(QueryFieldLabels.CO_PI_URL);
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
coPINode = nodeURLToVO.get(coPIURLNode.toString());
} else {
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
nodes.add(coPINode);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = qs.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
} }
}
Collaborator coPINode; log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
RDFNode coPIURLNode = solution.get(QueryFieldLabels.CO_PI_URL); Set<Collaborator> coPIsForCurrentGrant;
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
coPINode = nodeURLToVO.get(coPIURLNode.toString()); if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
} else { coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator); Collaboration egoCoPIEdge =
nodes.add(coPINode); getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = solution.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
}
}
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
Set<Collaborator> coPIsForCurrentGrant;
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
Collaboration egoCoPIEdge =
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
/* /*
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode * If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
* & current coPINode. Else create a new edge, add it to the edges set & add * & current coPINode. Else create a new edge, add it to the edges set & add
* the collaborator grant to it. * the collaborator grant to it.
* */ * */
if (egoCoPIEdge != null) { if (egoCoPIEdge != null) {
egoCoPIEdge.addActivity(grant); egoCoPIEdge.addActivity(grant);
} else { } else {
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator); egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
edges.add(egoCoPIEdge); edges.add(egoCoPIEdge);
edgeUniqueIdentifierToVO.put( edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(), getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coPINode.getCollaboratorID()), coPINode.getCollaboratorID()),
egoCoPIEdge); egoCoPIEdge);
} }
} }
@Override
protected void endProcessing() {
super.endProcessing();
/* /*
* This method takes out all the PIs & edges between PIs that belong to grants * This method takes out all the PIs & edges between PIs that belong to grants
* that have more than 100 PIs. We conjecture that these grants do not provide much * that have more than 100 PIs. We conjecture that these grants do not provide much
@ -507,9 +465,9 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* This method side-effects "nodes" & "edges". * This method side-effects "nodes" & "edges".
* */ * */
removeLowQualityNodesAndEdges(nodes, removeLowQualityNodesAndEdges(nodes,
grantURLToVO, grantURLToVO,
grantURLToCoPIs, grantURLToCoPIs,
edges); edges);
/* /*
* We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI * We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI
* ego, A & B then we have already created edges like, * ego, A & B then we have already created edges like,
@ -525,23 +483,22 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* a new edge. * a new edge.
* */ * */
createCoPIEdges(grantURLToVO, createCoPIEdges(grantURLToVO,
grantURLToCoPIs, grantURLToCoPIs,
edges, edges,
edgeUniqueIdentifierToVO); edgeUniqueIdentifierToVO);
}
after = System.currentTimeMillis();
log.debug("Time taken to iterate through the ResultSet of SELECT queries is in ms: "
+ (after - before));
public CollaborationData getData() {
return new CoInvestigationData(egoNode, nodes, edges); return new CoInvestigationData(egoNode, nodes, edges);
} }
private void createCoPIEdges(Map<String, Activity> grantURLToVO, private void createCoPIEdges(Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges, Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) { Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) { : grantURLToCoPIs.entrySet()) {
/* /*
* If there was only one co-PI (other than ego) then we dont have to create any * If there was only one co-PI (other than ego) then we dont have to create any
@ -551,60 +508,60 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* 100 co-PIs. Our conjecture is that such edges do not provide any good insight * 100 co-PIs. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out. * & causes unnecessary computations causing the server to time-out.
* */ * */
if (currentGrantEntry.getValue().size() > 1 if (currentGrantEntry.getValue().size() > 1
&& currentGrantEntry.getValue().size() && currentGrantEntry.getValue().size()
<= MAX_PI_PER_GRANT_ALLOWED) { <= MAX_PI_PER_GRANT_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>(); Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/* /*
* In order to leverage the nested "for loop" for making edges between all the * In order to leverage the nested "for loop" for making edges between all the
* co-PIs we need to create a list out of the set first. * co-PIs we need to create a list out of the set first.
* */ * */
List<Collaborator> coPINodes = List<Collaborator> coPINodes =
new ArrayList<Collaborator>(currentGrantEntry.getValue()); new ArrayList<Collaborator>(currentGrantEntry.getValue());
Collections.sort(coPINodes, new CollaboratorComparator()); Collections.sort(coPINodes, new CollaboratorComparator());
int numOfCoPIs = coPINodes.size(); int numOfCoPIs = coPINodes.size();
for (int ii = 0; ii < numOfCoPIs - 1; ii++) { for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoPIs; jj++) { for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
Collaborator coPI1 = coPINodes.get(ii); Collaborator coPI1 = coPINodes.get(ii);
Collaborator coPI2 = coPINodes.get(jj); Collaborator coPI2 = coPINodes.get(jj);
Collaboration coPI1_2Edge = getExistingEdge(coPI1, Collaboration coPI1_2Edge = getExistingEdge(coPI1,
coPI2, coPI2,
edgeUniqueIdentifierToVO); edgeUniqueIdentifierToVO);
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey()); Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
if (coPI1_2Edge != null) { if (coPI1_2Edge != null) {
coPI1_2Edge.addActivity(currentGrant); coPI1_2Edge.addActivity(currentGrant);
} else { } else {
coPI1_2Edge = new Collaboration(coPI1, coPI1_2Edge = new Collaboration(coPI1,
coPI2, coPI2,
currentGrant, currentGrant,
edgeIDGenerator); edgeIDGenerator);
newlyAddedEdges.add(coPI1_2Edge); newlyAddedEdges.add(coPI1_2Edge);
edgeUniqueIdentifierToVO.put( edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(), getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
coPI2.getCollaboratorID()), coPI2.getCollaboratorID()),
coPI1_2Edge); coPI1_2Edge);
}
}
} }
edges.addAll(newlyAddedEdges);
} }
} }
edges.addAll(newlyAddedEdges);
} }
}
}
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes, private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
Map<String, Activity> grantURLToVO, Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) { Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>(); Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) { : grantURLToCoPIs.entrySet()) {
if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) { if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) {
@ -615,7 +572,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
for (Collaboration currentEdge : edges) { for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorGrants = Set<Activity> currentCollaboratorGrants =
currentEdge.getCollaborationActivities(); currentEdge.getCollaborationActivities();
if (currentCollaboratorGrants.contains(currentGrant)) { if (currentCollaboratorGrants.contains(currentGrant)) {
currentCollaboratorGrants.remove(currentGrant); currentCollaboratorGrants.remove(currentGrant);
@ -634,14 +591,14 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
} }
} }
} }
}
nodes.removeAll(nodesToBeRemoved);
} }
nodes.removeAll(nodesToBeRemoved);
} private Activity createGrantVO(QuerySolution solution, String grantURL) {
private Activity createGrantVO(QuerySolution solution, String grantURL) { Activity grant = new Activity(grantURL);
Activity grant = new Activity(grantURL);
// RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL); // RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL);
// if (grantLabelNode != null) { // if (grantLabelNode != null) {
@ -649,17 +606,17 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
// } // }
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE); RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
if (grantStartYear != null) { if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString()); grant.setActivityDate(grantStartYear.toString());
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
}
} }
}
//TODO: Verify that grant end date is not required. //TODO: Verify that grant end date is not required.
/* /*
RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE); RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE);
if (grantEndDate != null) { if (grantEndDate != null) {
@ -672,6 +629,33 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
} }
*/ */
return grant; return grant;
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
/** END QUERY RESULT CONSUMER **/
} }
} }