[VIVO-1031] Use streaming result set for co- visualisations

This commit is contained in:
grahamtriggs 2015-10-19 15:51:57 +01:00
parent 208f103629
commit 8fc40fcf98
2 changed files with 457 additions and 485 deletions

View file

@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import net.sf.jga.algorithms.Unique;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.jena.iri.IRI;
@ -23,7 +25,6 @@ import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.RDFNode;
@ -58,312 +59,168 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
private Log log;
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
public CoAuthorshipQueryRunner(String egoURI,
RDFService rdfService, Log log) {
this.egoURI = egoURI;
this.rdfService = rdfService;
this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
}
private CollaborationData createQueryResult(ResultSet resultSet) {
private static class QueryResultConsumer extends ResultSetConsumer {
Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>();
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors =
new HashMap<String, Set<Collaborator>>();
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors = new HashMap<String, Set<Collaborator>>();
Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>();
Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>();
Collaborator egoNode = null;
Set<Collaboration> edges = new HashSet<Collaboration>();
while (resultSet.hasNext()) {
QuerySolution solution = resultSet.nextSolution();
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
@Override
protected void processQuerySolution(QuerySolution qs) {
/*
* We only want to create only ONE ego node.
* */
RDFNode egoAuthorURLNode = solution.get(QueryFieldLabels.AUTHOR_URL);
RDFNode egoAuthorURLNode = qs.get(QueryFieldLabels.AUTHOR_URL);
if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) {
egoNode = nodeURLToVO.get(egoAuthorURLNode.toString());
} else {
egoNode = new Collaborator(egoAuthorURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode);
nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.AUTHOR_LABEL);
RDFNode authorLabelNode = qs.get(QueryFieldLabels.AUTHOR_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
}
RDFNode documentNode = solution.get(QueryFieldLabels.DOCUMENT_URL);
RDFNode documentNode = qs.get(QueryFieldLabels.DOCUMENT_URL);
Activity biboDocument;
if (biboDocumentURLToVO.containsKey(documentNode.toString())) {
biboDocument = biboDocumentURLToVO.get(documentNode.toString());
} else {
biboDocument = createDocumentVO(solution, documentNode.toString());
biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
biboDocument = createDocumentVO(qs, documentNode.toString());
biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
}
egoNode.addActivity(biboDocument);
/*
* After some discussion we concluded that for the purpose of this visualization
* we do not want a co-author node or Collaboration if the publication has only one
* author and that happens to be the ego.
* */
if (solution.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
continue;
if (qs.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
qs.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
return;
}
Collaborator coAuthorNode;
RDFNode coAuthorURLNode = solution.get(QueryFieldLabels.CO_AUTHOR_URL);
RDFNode coAuthorURLNode = qs.get(QueryFieldLabels.CO_AUTHOR_URL);
if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) {
coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString());
} else {
coAuthorNode = new Collaborator(coAuthorURLNode.toString(), nodeIDGenerator);
nodes.add(coAuthorNode);
nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode);
RDFNode coAuthorLabelNode = solution.get(QueryFieldLabels.CO_AUTHOR_LABEL);
RDFNode coAuthorLabelNode = qs.get(QueryFieldLabels.CO_AUTHOR_LABEL);
if (coAuthorLabelNode != null) {
coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString());
}
}
coAuthorNode.addActivity(biboDocument);
Set<Collaborator> coAuthorsForCurrentBiboDocument;
if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) {
coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors
.get(biboDocument.getActivityURI());
.get(biboDocument.getActivityURI());
} else {
coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>();
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
coAuthorsForCurrentBiboDocument);
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
coAuthorsForCurrentBiboDocument);
}
coAuthorsForCurrentBiboDocument.add(coAuthorNode);
Collaboration egoCoAuthorEdge =
Collaboration egoCoAuthorEdge =
getExistingEdge(egoNode, coAuthorNode, edgeUniqueIdentifierToVO);
/*
* If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the
* egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges
* If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the
* egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges
* set & add the collaborator document to it.
* */
if (egoCoAuthorEdge != null) {
egoCoAuthorEdge.addActivity(biboDocument);
} else {
egoCoAuthorEdge =
egoCoAuthorEdge =
new Collaboration(egoNode, coAuthorNode, biboDocument, edgeIDGenerator);
edges.add(egoCoAuthorEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coAuthorNode.getCollaboratorID()),
coAuthorNode.getCollaboratorID()),
egoCoAuthorEdge);
}
}
/*
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
/*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
return new CoAuthorshipData(egoNode, nodes, edges);
}
private void removeLowQualityNodesAndEdges(
Set<Collaborator> nodes,
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry.getKey());
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorDocuments =
currentEdge.getCollaborationActivities();
if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
currentCollaboratorDocuments.remove(currentBiboDocument);
if (currentCollaboratorDocuments.isEmpty()) {
edgesToBeRemoved.add(currentEdge);
}
}
}
edges.removeAll(edgesToBeRemoved);
for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) {
currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument);
if (currentCoAuthor.getCollaboratorActivities().isEmpty()) {
nodesToBeRemoved.add(currentCoAuthor);
}
}
}
}
nodes.removeAll(nodesToBeRemoved);
}
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
@Override
protected void endProcessing() {
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-authors we need to create a list out of the set first.
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
/*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
List<Collaborator> coAuthorNodes =
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
Collections.sort(coAuthorNodes, new CollaboratorComparator());
int numOfCoAuthors = coAuthorNodes.size();
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Collaborator coAuthor1 = coAuthorNodes.get(ii);
Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
public CollaborationData getCollaborationData() {
return new CoAuthorshipData(egoNode, nodes, edges);
}
}
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
Activity biboDocument = new Activity(documentURL);
@ -373,8 +230,145 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
}
return biboDocument;
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-authors we need to create a list out of the set first.
* */
List<Collaborator> coAuthorNodes =
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
Collections.sort(coAuthorNodes, new CollaboratorComparator());
int numOfCoAuthors = coAuthorNodes.size();
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Collaborator coAuthor1 = coAuthorNodes.get(ii);
Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private void removeLowQualityNodesAndEdges(
Set<Collaborator> nodes,
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry.getKey());
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorDocuments =
currentEdge.getCollaborationActivities();
if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
currentCollaboratorDocuments.remove(currentBiboDocument);
if (currentCollaboratorDocuments.isEmpty()) {
edgesToBeRemoved.add(currentEdge);
}
}
}
edges.removeAll(edgesToBeRemoved);
for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) {
currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument);
if (currentCoAuthor.getCollaboratorActivities().isEmpty()) {
nodesToBeRemoved.add(currentCoAuthor);
}
}
}
}
nodes.removeAll(nodesToBeRemoved);
}
/* END QUERY RUNNER */
}
private String generateEgoCoAuthorshipSparqlQuery(String queryURI) {
String sparqlQuery = QueryConstants.getSparqlPrefixQuery()
@ -387,7 +381,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
+ " (str(?publicationDate) as ?"
+ QueryFieldLabels.DOCUMENT_PUBLICATION_DATE + ") \n"
+ "WHERE { \n"
+ "<" + queryURI + "> rdf:type foaf:Person ;"
+ "<" + queryURI + "> rdf:type foaf:Person ;"
+ " rdfs:label ?authorLabel ;"
+ " core:relatedBy ?authorshipNode . \n"
+ "?authorshipNode rdf:type core:Authorship ;"
@ -404,7 +398,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
+ "ORDER BY ?document ?coAuthorPerson\n";
log.debug("COAUTHORSHIP QUERY - " + sparqlQuery);
return sparqlQuery;
}
@ -455,19 +449,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
throw new MalformedQueryParametersException("URI parameter is either null or empty.");
}
InputStream is = null;
ResultSet rs = null;
try {
is = rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), RDFService.ResultFormat.JSON);
rs = ResultSetFactory.fromJSON(is);
data = createQueryResult(rs);
QueryResultConsumer consumer = new QueryResultConsumer();
rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), consumer);
data = consumer.getCollaborationData();
} catch (RDFServiceException e) {
log.error("Unable to execute query", e);
throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
}
CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry();

View file

@ -2,7 +2,6 @@
package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
@ -13,9 +12,9 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -23,14 +22,8 @@ import org.apache.jena.iri.IRI;
import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData;
@ -60,12 +53,8 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
private long before, after;
private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = ""
+ "OPTIONAL {"
+ " ?Role core:dateTimeInterval ?dateTimeIntervalValue . "
@ -95,9 +84,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
this.egoURI = egoURI;
this.rdfService = rdfService;
// this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
}
private String generateEgoCoPIquery(String queryURI) {
@ -294,19 +280,13 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
before = System.currentTimeMillis();
InputStream is = null;
ResultSet rs = null;
try {
is = rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), RDFService.ResultFormat.JSON);
rs = ResultSetFactory.fromJSON(is);
data = createQueryResult(rs);
QueryResultConsumer consumer = new QueryResultConsumer();
rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), consumer);
data = consumer.getData();
} catch (RDFServiceException e) {
log.error("Unable to execute query", e);
throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
}
after = System.currentTimeMillis();
@ -356,34 +336,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
private CollaborationData createQueryResult(ResultSet resultSet) {
private static class QueryResultConsumer extends ResultSetConsumer {
Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> grantURLToVO = new HashMap<String, Activity>();
@ -394,229 +347,233 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
Collaborator egoNode = null;
Set<Collaboration> edges = new HashSet<Collaboration>();
before = System.currentTimeMillis();
while (resultSet.hasNext()) {
QuerySolution solution = resultSet.nextSolution();
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
@Override
protected void processQuerySolution(QuerySolution qs) {
/*
* We only want to create only ONE ego node.
* */
RDFNode egoPIURLNode = solution.get(QueryFieldLabels.PI_URL);
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
} else {
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode);
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.PI_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
RDFNode egoPIURLNode = qs.get(QueryFieldLabels.PI_URL);
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
} else {
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode);
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
RDFNode authorLabelNode = qs.get(QueryFieldLabels.PI_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
log.debug("PI: " + egoNode.getIndividualLabel());
RDFNode grantNode = solution.get(QueryFieldLabels.GRANT_URL);
Activity grant;
if (grantURLToVO.containsKey(grantNode.toString())) {
grant = grantURLToVO.get(grantNode.toString());
} else {
grant = createGrantVO(solution, grantNode.toString());
grantURLToVO.put(grantNode.toString(), grant);
}
egoNode.addActivity(grant);
log.debug("Adding grant: " + grant.getIndividualLabel());
}
log.debug("PI: " + egoNode.getIndividualLabel());
RDFNode grantNode = qs.get(QueryFieldLabels.GRANT_URL);
Activity grant;
if (grantURLToVO.containsKey(grantNode.toString())) {
grant = grantURLToVO.get(grantNode.toString());
} else {
grant = createGrantVO(qs, grantNode.toString());
grantURLToVO.put(grantNode.toString(), grant);
}
egoNode.addActivity(grant);
log.debug("Adding grant: " + grant.getIndividualLabel());
/*
* After some discussion we concluded that for the purpose of this visualization
* we do not want a co-pi node or edge if the grant has only one
* pi and that happens to be the ego.
* */
if (solution.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_PI_URL).toString())) {
continue;
if (qs.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
qs.get(QueryFieldLabels.CO_PI_URL).toString())) {
return;
}
Collaborator coPINode;
RDFNode coPIURLNode = qs.get(QueryFieldLabels.CO_PI_URL);
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
coPINode = nodeURLToVO.get(coPIURLNode.toString());
} else {
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
nodes.add(coPINode);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = qs.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
}
Collaborator coPINode;
RDFNode coPIURLNode = solution.get(QueryFieldLabels.CO_PI_URL);
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
coPINode = nodeURLToVO.get(coPIURLNode.toString());
} else {
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
nodes.add(coPINode);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = solution.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
}
}
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
Set<Collaborator> coPIsForCurrentGrant;
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
Collaboration egoCoPIEdge =
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
}
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
Set<Collaborator> coPIsForCurrentGrant;
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
Collaboration egoCoPIEdge =
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
/*
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
* & current coPINode. Else create a new edge, add it to the edges set & add
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
* & current coPINode. Else create a new edge, add it to the edges set & add
* the collaborator grant to it.
* */
if (egoCoPIEdge != null) {
egoCoPIEdge.addActivity(grant);
} else {
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
edges.add(egoCoPIEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coPINode.getCollaboratorID()),
egoCoPIEdge);
}
if (egoCoPIEdge != null) {
egoCoPIEdge.addActivity(grant);
} else {
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
edges.add(egoCoPIEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coPINode.getCollaboratorID()),
egoCoPIEdge);
}
}
@Override
protected void endProcessing() {
super.endProcessing();
/*
* This method takes out all the PIs & edges between PIs that belong to grants
* that have more than 100 PIs. We conjecture that these grants do not provide much
* This method takes out all the PIs & edges between PIs that belong to grants
* that have more than 100 PIs. We conjecture that these grants do not provide much
* insight. However, we have left the grants be.
* This method side-effects "nodes" & "edges".
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
grantURLToVO,
grantURLToCoPIs,
edges);
removeLowQualityNodesAndEdges(nodes,
grantURLToVO,
grantURLToCoPIs,
edges);
/*
* We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are
* adding edges en masse for all the co-PIs on all the grants considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created
* before & co-PI edges created during the course of this method) when we are creating
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are
* adding edges en masse for all the co-PIs on all the grants considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created
* before & co-PI edges created during the course of this method) when we are creating
* a new edge.
* */
createCoPIEdges(grantURLToVO,
grantURLToCoPIs,
edges,
edgeUniqueIdentifierToVO);
after = System.currentTimeMillis();
log.debug("Time taken to iterate through the ResultSet of SELECT queries is in ms: "
+ (after - before));
return new CoInvestigationData(egoNode, nodes, edges);
}
createCoPIEdges(grantURLToVO,
grantURLToCoPIs,
edges,
edgeUniqueIdentifierToVO);
}
public CollaborationData getData() {
return new CoInvestigationData(egoNode, nodes, edges);
}
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
/*
* If there was only one co-PI (other than ego) then we dont have to create any
* If there was only one co-PI (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
*
* We are restricting edges between co-PI if a particular grant has more than
* 100 co-PIs. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentGrantEntry.getValue().size() > 1
&& currentGrantEntry.getValue().size()
if (currentGrantEntry.getValue().size() > 1
&& currentGrantEntry.getValue().size()
<= MAX_PI_PER_GRANT_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-PIs we need to create a list out of the set first.
* In order to leverage the nested "for loop" for making edges between all the
* co-PIs we need to create a list out of the set first.
* */
List<Collaborator> coPINodes =
new ArrayList<Collaborator>(currentGrantEntry.getValue());
Collections.sort(coPINodes, new CollaboratorComparator());
int numOfCoPIs = coPINodes.size();
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
Collaborator coPI1 = coPINodes.get(ii);
Collaborator coPI2 = coPINodes.get(jj);
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
coPI2,
edgeUniqueIdentifierToVO);
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
if (coPI1_2Edge != null) {
coPI1_2Edge.addActivity(currentGrant);
} else {
coPI1_2Edge = new Collaboration(coPI1,
coPI2,
currentGrant,
edgeIDGenerator);
newlyAddedEdges.add(coPI1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
coPI2.getCollaboratorID()),
coPI1_2Edge);
List<Collaborator> coPINodes =
new ArrayList<Collaborator>(currentGrantEntry.getValue());
Collections.sort(coPINodes, new CollaboratorComparator());
int numOfCoPIs = coPINodes.size();
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
Collaborator coPI1 = coPINodes.get(ii);
Collaborator coPI2 = coPINodes.get(jj);
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
coPI2,
edgeUniqueIdentifierToVO);
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
if (coPI1_2Edge != null) {
coPI1_2Edge.addActivity(currentGrant);
} else {
coPI1_2Edge = new Collaboration(coPI1,
coPI2,
currentGrant,
edgeIDGenerator);
newlyAddedEdges.add(coPI1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
coPI2.getCollaboratorID()),
coPI1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) {
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorGrants =
currentEdge.getCollaborationActivities();
Set<Activity> currentCollaboratorGrants =
currentEdge.getCollaborationActivities();
if (currentCollaboratorGrants.contains(currentGrant)) {
currentCollaboratorGrants.remove(currentGrant);
if (currentCollaboratorGrants.isEmpty()) {
@ -624,7 +581,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
}
}
edges.removeAll(edgesToBeRemoved);
for (Collaborator currentCoPI : currentGrantEntry.getValue()) {
@ -634,14 +591,14 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
}
}
}
nodes.removeAll(nodesToBeRemoved);
}
}
nodes.removeAll(nodesToBeRemoved);
private Activity createGrantVO(QuerySolution solution, String grantURL) {
Activity grant = new Activity(grantURL);
}
private Activity createGrantVO(QuerySolution solution, String grantURL) {
Activity grant = new Activity(grantURL);
// RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL);
// if (grantLabelNode != null) {
@ -649,17 +606,17 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
// }
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
}
}
//TODO: Verify that grant end date is not required.
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
}
}
//TODO: Verify that grant end date is not required.
/*
RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE);
if (grantEndDate != null) {
@ -668,10 +625,37 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
grantEndDate = solution.get(QueryFieldLabels.GRANT_END_DATE);
if(grantEndDate != null){
grant.setGrantEndDate(grantEndDate.toString());
}
}
}
*/
return grant;
return grant;
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
/** END QUERY RESULT CONSUMER **/
}
}