From 8fc40fcf987488f60d851f0eab81dbb455f5c602 Mon Sep 17 00:00:00 2001 From: grahamtriggs Date: Mon, 19 Oct 2015 15:51:57 +0100 Subject: [PATCH] [VIVO-1031] Use streaming result set for co- visualisations --- .../coauthorship/CoAuthorshipQueryRunner.java | 456 ++++++++-------- .../CoPIGrantCountQueryRunner.java | 486 +++++++++--------- 2 files changed, 457 insertions(+), 485 deletions(-) diff --git a/src/edu/cornell/mannlib/vitro/webapp/visualization/coauthorship/CoAuthorshipQueryRunner.java b/src/edu/cornell/mannlib/vitro/webapp/visualization/coauthorship/CoAuthorshipQueryRunner.java index 15456fd0..eb623ce2 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/visualization/coauthorship/CoAuthorshipQueryRunner.java +++ b/src/edu/cornell/mannlib/vitro/webapp/visualization/coauthorship/CoAuthorshipQueryRunner.java @@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap; import com.hp.hpl.jena.query.ResultSetFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; +import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer; +import net.sf.jga.algorithms.Unique; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.jena.iri.IRI; @@ -23,7 +25,6 @@ import org.apache.jena.iri.IRIFactory; import org.apache.jena.iri.Violation; import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.RDFNode; @@ -58,312 +59,168 @@ public class CoAuthorshipQueryRunner implements QueryRunner { private Log log; - private UniqueIDGenerator nodeIDGenerator; - - private UniqueIDGenerator edgeIDGenerator; - public CoAuthorshipQueryRunner(String egoURI, RDFService rdfService, Log log) { this.egoURI = egoURI; this.rdfService = rdfService; this.log = log; - - this.nodeIDGenerator = new UniqueIDGenerator(); - this.edgeIDGenerator = new UniqueIDGenerator(); } - private CollaborationData createQueryResult(ResultSet resultSet) { - + private static class QueryResultConsumer extends ResultSetConsumer { Set nodes = new HashSet(); Map biboDocumentURLToVO = new HashMap(); - Map> biboDocumentURLToCoAuthors = - new HashMap>(); + Map> biboDocumentURLToCoAuthors = new HashMap>(); Map nodeURLToVO = new HashMap(); Map edgeUniqueIdentifierToVO = new HashMap(); Collaborator egoNode = null; Set edges = new HashSet(); - - while (resultSet.hasNext()) { - QuerySolution solution = resultSet.nextSolution(); - + + private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator(); + private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator(); + + @Override + protected void processQuerySolution(QuerySolution qs) { /* * We only want to create only ONE ego node. * */ - RDFNode egoAuthorURLNode = solution.get(QueryFieldLabels.AUTHOR_URL); + RDFNode egoAuthorURLNode = qs.get(QueryFieldLabels.AUTHOR_URL); if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) { egoNode = nodeURLToVO.get(egoAuthorURLNode.toString()); - + } else { - + egoNode = new Collaborator(egoAuthorURLNode.toString(), nodeIDGenerator); nodes.add(egoNode); nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode); - - RDFNode authorLabelNode = solution.get(QueryFieldLabels.AUTHOR_LABEL); + + RDFNode authorLabelNode = qs.get(QueryFieldLabels.AUTHOR_LABEL); if (authorLabelNode != null) { egoNode.setCollaboratorName(authorLabelNode.toString()); } } - - RDFNode documentNode = solution.get(QueryFieldLabels.DOCUMENT_URL); + + RDFNode documentNode = qs.get(QueryFieldLabels.DOCUMENT_URL); Activity biboDocument; - + if (biboDocumentURLToVO.containsKey(documentNode.toString())) { biboDocument = biboDocumentURLToVO.get(documentNode.toString()); } else { - biboDocument = createDocumentVO(solution, documentNode.toString()); - biboDocumentURLToVO.put(documentNode.toString(), biboDocument); + biboDocument = createDocumentVO(qs, documentNode.toString()); + biboDocumentURLToVO.put(documentNode.toString(), biboDocument); } - + egoNode.addActivity(biboDocument); - + /* * After some discussion we concluded that for the purpose of this visualization * we do not want a co-author node or Collaboration if the publication has only one * author and that happens to be the ego. * */ - if (solution.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase( - solution.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) { - continue; + if (qs.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase( + qs.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) { + return; } - + Collaborator coAuthorNode; - - RDFNode coAuthorURLNode = solution.get(QueryFieldLabels.CO_AUTHOR_URL); + + RDFNode coAuthorURLNode = qs.get(QueryFieldLabels.CO_AUTHOR_URL); if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) { coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString()); - + } else { - + coAuthorNode = new Collaborator(coAuthorURLNode.toString(), nodeIDGenerator); nodes.add(coAuthorNode); nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode); - - RDFNode coAuthorLabelNode = solution.get(QueryFieldLabels.CO_AUTHOR_LABEL); + + RDFNode coAuthorLabelNode = qs.get(QueryFieldLabels.CO_AUTHOR_LABEL); if (coAuthorLabelNode != null) { coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString()); } } - + coAuthorNode.addActivity(biboDocument); - + Set coAuthorsForCurrentBiboDocument; - + if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) { coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors - .get(biboDocument.getActivityURI()); + .get(biboDocument.getActivityURI()); } else { coAuthorsForCurrentBiboDocument = new HashSet(); - biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(), - coAuthorsForCurrentBiboDocument); + biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(), + coAuthorsForCurrentBiboDocument); } - + coAuthorsForCurrentBiboDocument.add(coAuthorNode); - - Collaboration egoCoAuthorEdge = + + Collaboration egoCoAuthorEdge = getExistingEdge(egoNode, coAuthorNode, edgeUniqueIdentifierToVO); - + /* - * If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the - * egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges + * If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the + * egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges * set & add the collaborator document to it. * */ if (egoCoAuthorEdge != null) { egoCoAuthorEdge.addActivity(biboDocument); } else { - egoCoAuthorEdge = + egoCoAuthorEdge = new Collaboration(egoNode, coAuthorNode, biboDocument, edgeIDGenerator); edges.add(egoCoAuthorEdge); edgeUniqueIdentifierToVO.put( getEdgeUniqueIdentifier(egoNode.getCollaboratorID(), - coAuthorNode.getCollaboratorID()), + coAuthorNode.getCollaboratorID()), egoCoAuthorEdge); } - - } - - - - /* - * This method takes out all the authors & edges between authors that belong to documents - * that have more than 100 authors. We conjecture that these papers do not provide much - * insight. However, we have left the documents be. - * - * This method side-effects "nodes" & "edges". - * */ - removeLowQualityNodesAndEdges(nodes, - biboDocumentURLToVO, - biboDocumentURLToCoAuthors, - edges); - - /* - * We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors - * ego, A & B then we have already created edges like, - * ego - A - * ego - B - * The below sub-routine will take care of, - * A - B - * - * We are side-effecting "edges" here. The only reason to do this is because we are adding - * edges en masse for all the co-authors on all the publications considered so far. The - * other reason being we dont want to compare against 2 sets of edges (edges created before - * & co-author edges created during the course of this method) when we are creating a new - * Collaboration. - * */ - createCoAuthorEdges(biboDocumentURLToVO, - biboDocumentURLToCoAuthors, - edges, - edgeUniqueIdentifierToVO); - - - return new CoAuthorshipData(egoNode, nodes, edges); - } - private void removeLowQualityNodesAndEdges( - Set nodes, - Map biboDocumentURLToVO, - Map> biboDocumentURLToCoAuthors, - Set edges) { - - Set nodesToBeRemoved = new HashSet(); - for (Map.Entry> currentBiboDocumentEntry - : biboDocumentURLToCoAuthors.entrySet()) { - - if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) { - - Activity currentBiboDocument = biboDocumentURLToVO - .get(currentBiboDocumentEntry.getKey()); - - Set edgesToBeRemoved = new HashSet(); - - for (Collaboration currentEdge : edges) { - Set currentCollaboratorDocuments = - currentEdge.getCollaborationActivities(); - - if (currentCollaboratorDocuments.contains(currentBiboDocument)) { - currentCollaboratorDocuments.remove(currentBiboDocument); - if (currentCollaboratorDocuments.isEmpty()) { - edgesToBeRemoved.add(currentEdge); - } - } - } - - edges.removeAll(edgesToBeRemoved); - - for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) { - currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument); - if (currentCoAuthor.getCollaboratorActivities().isEmpty()) { - nodesToBeRemoved.add(currentCoAuthor); - } - } - } - } - nodes.removeAll(nodesToBeRemoved); - } - - private void createCoAuthorEdges( - Map biboDocumentURLToVO, - Map> biboDocumentURLToCoAuthors, Set edges, - Map edgeUniqueIdentifierToVO) { - - for (Map.Entry> currentBiboDocumentEntry - : biboDocumentURLToCoAuthors.entrySet()) { - - /* - * If there was only one co-author (other than ego) then we dont have to create any - * edges. so the below condition will take care of that. - * - * We are restricting edges between co-author if a particular document has more than - * 100 co-authors. Our conjecture is that such edges do not provide any good insight - * & causes unnecessary computations causing the server to time-out. - * */ - if (currentBiboDocumentEntry.getValue().size() > 1 - && currentBiboDocumentEntry.getValue().size() - <= MAX_AUTHORS_PER_PAPER_ALLOWED) { - - - Set newlyAddedEdges = new HashSet(); - + @Override + protected void endProcessing() { /* - * In order to leverage the nested "for loop" for making edges between all the - * co-authors we need to create a list out of the set first. + * This method takes out all the authors & edges between authors that belong to documents + * that have more than 100 authors. We conjecture that these papers do not provide much + * insight. However, we have left the documents be. + * + * This method side-effects "nodes" & "edges". + * */ + removeLowQualityNodesAndEdges(nodes, + biboDocumentURLToVO, + biboDocumentURLToCoAuthors, + edges); + + /* + * We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors + * ego, A & B then we have already created edges like, + * ego - A + * ego - B + * The below sub-routine will take care of, + * A - B + * + * We are side-effecting "edges" here. The only reason to do this is because we are adding + * edges en masse for all the co-authors on all the publications considered so far. The + * other reason being we dont want to compare against 2 sets of edges (edges created before + * & co-author edges created during the course of this method) when we are creating a new + * Collaboration. * */ - List coAuthorNodes = - new ArrayList(currentBiboDocumentEntry.getValue()); - - Collections.sort(coAuthorNodes, new CollaboratorComparator()); - - int numOfCoAuthors = coAuthorNodes.size(); - - for (int ii = 0; ii < numOfCoAuthors - 1; ii++) { - for (int jj = ii + 1; jj < numOfCoAuthors; jj++) { - - Collaborator coAuthor1 = coAuthorNodes.get(ii); - Collaborator coAuthor2 = coAuthorNodes.get(jj); - - Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1, - coAuthor2, - edgeUniqueIdentifierToVO); - - Activity currentBiboDocument = biboDocumentURLToVO - .get(currentBiboDocumentEntry - .getKey()); - - if (coAuthor1_2Edge != null) { - coAuthor1_2Edge.addActivity(currentBiboDocument); - } else { - coAuthor1_2Edge = new Collaboration(coAuthor1, - coAuthor2, - currentBiboDocument, - edgeIDGenerator); - newlyAddedEdges.add(coAuthor1_2Edge); - edgeUniqueIdentifierToVO.put( - getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(), - coAuthor2.getCollaboratorID()), - coAuthor1_2Edge); - } - } - } - edges.addAll(newlyAddedEdges); - } - + createCoAuthorEdges(biboDocumentURLToVO, + biboDocumentURLToCoAuthors, + edges, + edgeUniqueIdentifierToVO); } - } - private Collaboration getExistingEdge( - Collaborator collaboratingNode1, - Collaborator collaboratingNode2, - Map edgeUniqueIdentifierToVO) { - - String edgeUniqueIdentifier = getEdgeUniqueIdentifier( - collaboratingNode1.getCollaboratorID(), - collaboratingNode2.getCollaboratorID()); - - return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier); - - } - - private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) { - - String separator = "*"; - - if (nodeID1 < nodeID2) { - return nodeID1 + separator + nodeID2; - } else { - return nodeID2 + separator + nodeID1; + public CollaborationData getCollaborationData() { + return new CoAuthorshipData(egoNode, nodes, edges); } - - } - private Activity createDocumentVO(QuerySolution solution, String documentURL) { + private Activity createDocumentVO(QuerySolution solution, String documentURL) { Activity biboDocument = new Activity(documentURL); @@ -373,8 +230,145 @@ public class CoAuthorshipQueryRunner implements QueryRunner { } return biboDocument; + } + + private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) { + String separator = "*"; + + if (nodeID1 < nodeID2) { + return nodeID1 + separator + nodeID2; + } else { + return nodeID2 + separator + nodeID1; + } + + } + + private void createCoAuthorEdges( + Map biboDocumentURLToVO, + Map> biboDocumentURLToCoAuthors, Set edges, + Map edgeUniqueIdentifierToVO) { + + for (Map.Entry> currentBiboDocumentEntry + : biboDocumentURLToCoAuthors.entrySet()) { + + /* + * If there was only one co-author (other than ego) then we dont have to create any + * edges. so the below condition will take care of that. + * + * We are restricting edges between co-author if a particular document has more than + * 100 co-authors. Our conjecture is that such edges do not provide any good insight + * & causes unnecessary computations causing the server to time-out. + * */ + if (currentBiboDocumentEntry.getValue().size() > 1 + && currentBiboDocumentEntry.getValue().size() + <= MAX_AUTHORS_PER_PAPER_ALLOWED) { + + + Set newlyAddedEdges = new HashSet(); + + /* + * In order to leverage the nested "for loop" for making edges between all the + * co-authors we need to create a list out of the set first. + * */ + List coAuthorNodes = + new ArrayList(currentBiboDocumentEntry.getValue()); + + Collections.sort(coAuthorNodes, new CollaboratorComparator()); + + int numOfCoAuthors = coAuthorNodes.size(); + + for (int ii = 0; ii < numOfCoAuthors - 1; ii++) { + for (int jj = ii + 1; jj < numOfCoAuthors; jj++) { + + Collaborator coAuthor1 = coAuthorNodes.get(ii); + Collaborator coAuthor2 = coAuthorNodes.get(jj); + + Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1, + coAuthor2, + edgeUniqueIdentifierToVO); + + Activity currentBiboDocument = biboDocumentURLToVO + .get(currentBiboDocumentEntry + .getKey()); + + if (coAuthor1_2Edge != null) { + coAuthor1_2Edge.addActivity(currentBiboDocument); + } else { + coAuthor1_2Edge = new Collaboration(coAuthor1, + coAuthor2, + currentBiboDocument, + edgeIDGenerator); + newlyAddedEdges.add(coAuthor1_2Edge); + edgeUniqueIdentifierToVO.put( + getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(), + coAuthor2.getCollaboratorID()), + coAuthor1_2Edge); + } + } + } + edges.addAll(newlyAddedEdges); + } + } + } + + private Collaboration getExistingEdge( + Collaborator collaboratingNode1, + Collaborator collaboratingNode2, + Map edgeUniqueIdentifierToVO) { + + String edgeUniqueIdentifier = getEdgeUniqueIdentifier( + collaboratingNode1.getCollaboratorID(), + collaboratingNode2.getCollaboratorID()); + + return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier); + + } + + private void removeLowQualityNodesAndEdges( + Set nodes, + Map biboDocumentURLToVO, + Map> biboDocumentURLToCoAuthors, + Set edges) { + + Set nodesToBeRemoved = new HashSet(); + for (Map.Entry> currentBiboDocumentEntry + : biboDocumentURLToCoAuthors.entrySet()) { + + if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) { + + Activity currentBiboDocument = biboDocumentURLToVO + .get(currentBiboDocumentEntry.getKey()); + + Set edgesToBeRemoved = new HashSet(); + + for (Collaboration currentEdge : edges) { + Set currentCollaboratorDocuments = + currentEdge.getCollaborationActivities(); + + if (currentCollaboratorDocuments.contains(currentBiboDocument)) { + currentCollaboratorDocuments.remove(currentBiboDocument); + if (currentCollaboratorDocuments.isEmpty()) { + edgesToBeRemoved.add(currentEdge); + } + } + } + + edges.removeAll(edgesToBeRemoved); + + for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) { + currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument); + if (currentCoAuthor.getCollaboratorActivities().isEmpty()) { + nodesToBeRemoved.add(currentCoAuthor); + } + } + } + } + nodes.removeAll(nodesToBeRemoved); + } + + /* END QUERY RUNNER */ } - + private String generateEgoCoAuthorshipSparqlQuery(String queryURI) { String sparqlQuery = QueryConstants.getSparqlPrefixQuery() @@ -387,7 +381,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner { + " (str(?publicationDate) as ?" + QueryFieldLabels.DOCUMENT_PUBLICATION_DATE + ") \n" + "WHERE { \n" - + "<" + queryURI + "> rdf:type foaf:Person ;" + + "<" + queryURI + "> rdf:type foaf:Person ;" + " rdfs:label ?authorLabel ;" + " core:relatedBy ?authorshipNode . \n" + "?authorshipNode rdf:type core:Authorship ;" @@ -404,7 +398,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner { + "ORDER BY ?document ?coAuthorPerson\n"; log.debug("COAUTHORSHIP QUERY - " + sparqlQuery); - + return sparqlQuery; } @@ -455,19 +449,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner { throw new MalformedQueryParametersException("URI parameter is either null or empty."); } - InputStream is = null; - ResultSet rs = null; try { - is = rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), RDFService.ResultFormat.JSON); - rs = ResultSetFactory.fromJSON(is); - data = createQueryResult(rs); + QueryResultConsumer consumer = new QueryResultConsumer(); + rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), consumer); + data = consumer.getCollaborationData(); } catch (RDFServiceException e) { log.error("Unable to execute query", e); throw new RuntimeException(e); - } finally { - if (is != null) { - try { is.close(); } catch (Throwable t) { } - } } CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry(); diff --git a/src/edu/cornell/mannlib/vitro/webapp/visualization/coprincipalinvestigator/CoPIGrantCountQueryRunner.java b/src/edu/cornell/mannlib/vitro/webapp/visualization/coprincipalinvestigator/CoPIGrantCountQueryRunner.java index 4f4262d6..8022a308 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/visualization/coprincipalinvestigator/CoPIGrantCountQueryRunner.java +++ b/src/edu/cornell/mannlib/vitro/webapp/visualization/coprincipalinvestigator/CoPIGrantCountQueryRunner.java @@ -2,7 +2,6 @@ package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator; -import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -13,9 +12,9 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import com.hp.hpl.jena.query.ResultSetFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; +import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -23,14 +22,8 @@ import org.apache.jena.iri.IRI; import org.apache.jena.iri.IRIFactory; import org.apache.jena.iri.Violation; -import com.hp.hpl.jena.query.Query; -import com.hp.hpl.jena.query.QueryExecution; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; -import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; -import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.RDFNode; import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData; @@ -60,12 +53,8 @@ public class CoPIGrantCountQueryRunner implements QueryRunner private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName()); - private UniqueIDGenerator nodeIDGenerator; - - private UniqueIDGenerator edgeIDGenerator; - private long before, after; - + private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = "" + "OPTIONAL {" + " ?Role core:dateTimeInterval ?dateTimeIntervalValue . " @@ -95,9 +84,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner this.egoURI = egoURI; this.rdfService = rdfService; // this.log = log; - - this.nodeIDGenerator = new UniqueIDGenerator(); - this.edgeIDGenerator = new UniqueIDGenerator(); } private String generateEgoCoPIquery(String queryURI) { @@ -294,19 +280,13 @@ public class CoPIGrantCountQueryRunner implements QueryRunner before = System.currentTimeMillis(); - InputStream is = null; - ResultSet rs = null; try { - is = rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), RDFService.ResultFormat.JSON); - rs = ResultSetFactory.fromJSON(is); - data = createQueryResult(rs); + QueryResultConsumer consumer = new QueryResultConsumer(); + rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), consumer); + data = consumer.getData(); } catch (RDFServiceException e) { log.error("Unable to execute query", e); throw new RuntimeException(e); - } finally { - if (is != null) { - try { is.close(); } catch (Throwable t) { } - } } after = System.currentTimeMillis(); @@ -356,34 +336,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner } } - - private Collaboration getExistingEdge( - Collaborator collaboratingNode1, - Collaborator collaboratingNode2, - Map edgeUniqueIdentifierToVO) { - - String edgeUniqueIdentifier = getEdgeUniqueIdentifier( - collaboratingNode1.getCollaboratorID(), - collaboratingNode2.getCollaboratorID()); - - return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier); - - } - - private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) { - - String separator = "*"; - - if (nodeID1 < nodeID2) { - return nodeID1 + separator + nodeID2; - } else { - return nodeID2 + separator + nodeID1; - } - - } - - private CollaborationData createQueryResult(ResultSet resultSet) { - + private static class QueryResultConsumer extends ResultSetConsumer { Set nodes = new HashSet(); Map grantURLToVO = new HashMap(); @@ -394,229 +347,233 @@ public class CoPIGrantCountQueryRunner implements QueryRunner Collaborator egoNode = null; Set edges = new HashSet(); - - before = System.currentTimeMillis(); - - while (resultSet.hasNext()) { - QuerySolution solution = resultSet.nextSolution(); - + + private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator(); + private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator(); + + private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName()); + + @Override + protected void processQuerySolution(QuerySolution qs) { /* * We only want to create only ONE ego node. * */ - RDFNode egoPIURLNode = solution.get(QueryFieldLabels.PI_URL); - if (nodeURLToVO.containsKey(egoPIURLNode.toString())) { - - egoNode = nodeURLToVO.get(egoPIURLNode.toString()); - - } else { - - egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator); - nodes.add(egoNode); - nodeURLToVO.put(egoPIURLNode.toString(), egoNode); - - - RDFNode authorLabelNode = solution.get(QueryFieldLabels.PI_LABEL); - if (authorLabelNode != null) { - egoNode.setCollaboratorName(authorLabelNode.toString()); - } + RDFNode egoPIURLNode = qs.get(QueryFieldLabels.PI_URL); + if (nodeURLToVO.containsKey(egoPIURLNode.toString())) { + + egoNode = nodeURLToVO.get(egoPIURLNode.toString()); + + } else { + + egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator); + nodes.add(egoNode); + nodeURLToVO.put(egoPIURLNode.toString(), egoNode); + + + RDFNode authorLabelNode = qs.get(QueryFieldLabels.PI_LABEL); + if (authorLabelNode != null) { + egoNode.setCollaboratorName(authorLabelNode.toString()); } - log.debug("PI: " + egoNode.getIndividualLabel()); - - RDFNode grantNode = solution.get(QueryFieldLabels.GRANT_URL); - Activity grant; - - if (grantURLToVO.containsKey(grantNode.toString())) { - grant = grantURLToVO.get(grantNode.toString()); - } else { - grant = createGrantVO(solution, grantNode.toString()); - grantURLToVO.put(grantNode.toString(), grant); - } - - egoNode.addActivity(grant); - log.debug("Adding grant: " + grant.getIndividualLabel()); - + } + log.debug("PI: " + egoNode.getIndividualLabel()); + + RDFNode grantNode = qs.get(QueryFieldLabels.GRANT_URL); + Activity grant; + + if (grantURLToVO.containsKey(grantNode.toString())) { + grant = grantURLToVO.get(grantNode.toString()); + } else { + grant = createGrantVO(qs, grantNode.toString()); + grantURLToVO.put(grantNode.toString(), grant); + } + + egoNode.addActivity(grant); + log.debug("Adding grant: " + grant.getIndividualLabel()); + /* * After some discussion we concluded that for the purpose of this visualization * we do not want a co-pi node or edge if the grant has only one * pi and that happens to be the ego. * */ - if (solution.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase( - solution.get(QueryFieldLabels.CO_PI_URL).toString())) { - continue; + if (qs.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase( + qs.get(QueryFieldLabels.CO_PI_URL).toString())) { + return; + } + + Collaborator coPINode; + + RDFNode coPIURLNode = qs.get(QueryFieldLabels.CO_PI_URL); + if (nodeURLToVO.containsKey(coPIURLNode.toString())) { + + coPINode = nodeURLToVO.get(coPIURLNode.toString()); + + } else { + + coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator); + nodes.add(coPINode); + nodeURLToVO.put(coPIURLNode.toString(), coPINode); + + RDFNode coPILabelNode = qs.get(QueryFieldLabels.CO_PI_LABEL); + if (coPILabelNode != null) { + coPINode.setCollaboratorName(coPILabelNode.toString()); } - - Collaborator coPINode; - - RDFNode coPIURLNode = solution.get(QueryFieldLabels.CO_PI_URL); - if (nodeURLToVO.containsKey(coPIURLNode.toString())) { - - coPINode = nodeURLToVO.get(coPIURLNode.toString()); - - } else { - - coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator); - nodes.add(coPINode); - nodeURLToVO.put(coPIURLNode.toString(), coPINode); - - RDFNode coPILabelNode = solution.get(QueryFieldLabels.CO_PI_LABEL); - if (coPILabelNode != null) { - coPINode.setCollaboratorName(coPILabelNode.toString()); - } - } - - log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel()); - coPINode.addActivity(grant); - - Set coPIsForCurrentGrant; - - if (grantURLToCoPIs.containsKey(grant.getActivityURI())) { - coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI()); - } else { - coPIsForCurrentGrant = new HashSet(); - grantURLToCoPIs.put(grant.getActivityURI(), - coPIsForCurrentGrant); - } - - coPIsForCurrentGrant.add(coPINode); - log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel()); - - Collaboration egoCoPIEdge = - getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO); + } + + log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel()); + coPINode.addActivity(grant); + + Set coPIsForCurrentGrant; + + if (grantURLToCoPIs.containsKey(grant.getActivityURI())) { + coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI()); + } else { + coPIsForCurrentGrant = new HashSet(); + grantURLToCoPIs.put(grant.getActivityURI(), + coPIsForCurrentGrant); + } + + coPIsForCurrentGrant.add(coPINode); + log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel()); + + Collaboration egoCoPIEdge = + getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO); /* - * If "egoCoPIEdge" is null it means that no edge exists in between the egoNode - * & current coPINode. Else create a new edge, add it to the edges set & add + * If "egoCoPIEdge" is null it means that no edge exists in between the egoNode + * & current coPINode. Else create a new edge, add it to the edges set & add * the collaborator grant to it. * */ - if (egoCoPIEdge != null) { - egoCoPIEdge.addActivity(grant); - } else { - egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator); - edges.add(egoCoPIEdge); - edgeUniqueIdentifierToVO.put( - getEdgeUniqueIdentifier(egoNode.getCollaboratorID(), - coPINode.getCollaboratorID()), - egoCoPIEdge); - } - + if (egoCoPIEdge != null) { + egoCoPIEdge.addActivity(grant); + } else { + egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator); + edges.add(egoCoPIEdge); + edgeUniqueIdentifierToVO.put( + getEdgeUniqueIdentifier(egoNode.getCollaboratorID(), + coPINode.getCollaboratorID()), + egoCoPIEdge); + } } - + + + @Override + protected void endProcessing() { + super.endProcessing(); /* - * This method takes out all the PIs & edges between PIs that belong to grants - * that have more than 100 PIs. We conjecture that these grants do not provide much + * This method takes out all the PIs & edges between PIs that belong to grants + * that have more than 100 PIs. We conjecture that these grants do not provide much * insight. However, we have left the grants be. - * This method side-effects "nodes" & "edges". + * This method side-effects "nodes" & "edges". * */ - removeLowQualityNodesAndEdges(nodes, - grantURLToVO, - grantURLToCoPIs, - edges); + removeLowQualityNodesAndEdges(nodes, + grantURLToVO, + grantURLToCoPIs, + edges); /* * We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI * ego, A & B then we have already created edges like, * ego - A * ego - B * The below sub-routine will take care of, - * A - B - * - * We are side-effecting "edges" here. The only reason to do this is because we are - * adding edges en masse for all the co-PIs on all the grants considered so far. The - * other reason being we dont want to compare against 2 sets of edges (edges created - * before & co-PI edges created during the course of this method) when we are creating + * A - B + * + * We are side-effecting "edges" here. The only reason to do this is because we are + * adding edges en masse for all the co-PIs on all the grants considered so far. The + * other reason being we dont want to compare against 2 sets of edges (edges created + * before & co-PI edges created during the course of this method) when we are creating * a new edge. * */ - createCoPIEdges(grantURLToVO, - grantURLToCoPIs, - edges, - edgeUniqueIdentifierToVO); - - after = System.currentTimeMillis(); - log.debug("Time taken to iterate through the ResultSet of SELECT queries is in ms: " - + (after - before)); - - return new CoInvestigationData(egoNode, nodes, edges); - } + createCoPIEdges(grantURLToVO, + grantURLToCoPIs, + edges, + edgeUniqueIdentifierToVO); + } + + + public CollaborationData getData() { + return new CoInvestigationData(egoNode, nodes, edges); + } + + private void createCoPIEdges(Map grantURLToVO, + Map> grantURLToCoPIs, Set edges, + Map edgeUniqueIdentifierToVO) { + + for (Map.Entry> currentGrantEntry + : grantURLToCoPIs.entrySet()) { - private void createCoPIEdges(Map grantURLToVO, - Map> grantURLToCoPIs, Set edges, - Map edgeUniqueIdentifierToVO) { - - for (Map.Entry> currentGrantEntry - : grantURLToCoPIs.entrySet()) { - /* - * If there was only one co-PI (other than ego) then we dont have to create any + * If there was only one co-PI (other than ego) then we dont have to create any * edges. so the below condition will take care of that. - * + * * We are restricting edges between co-PI if a particular grant has more than * 100 co-PIs. Our conjecture is that such edges do not provide any good insight * & causes unnecessary computations causing the server to time-out. * */ - if (currentGrantEntry.getValue().size() > 1 - && currentGrantEntry.getValue().size() + if (currentGrantEntry.getValue().size() > 1 + && currentGrantEntry.getValue().size() <= MAX_PI_PER_GRANT_ALLOWED) { - - Set newlyAddedEdges = new HashSet(); - + + Set newlyAddedEdges = new HashSet(); + /* - * In order to leverage the nested "for loop" for making edges between all the - * co-PIs we need to create a list out of the set first. + * In order to leverage the nested "for loop" for making edges between all the + * co-PIs we need to create a list out of the set first. * */ - List coPINodes = - new ArrayList(currentGrantEntry.getValue()); - Collections.sort(coPINodes, new CollaboratorComparator()); - - int numOfCoPIs = coPINodes.size(); - - for (int ii = 0; ii < numOfCoPIs - 1; ii++) { - for (int jj = ii + 1; jj < numOfCoPIs; jj++) { - - Collaborator coPI1 = coPINodes.get(ii); - Collaborator coPI2 = coPINodes.get(jj); - - Collaboration coPI1_2Edge = getExistingEdge(coPI1, - coPI2, - edgeUniqueIdentifierToVO); - - Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey()); - - if (coPI1_2Edge != null) { - coPI1_2Edge.addActivity(currentGrant); - } else { - coPI1_2Edge = new Collaboration(coPI1, - coPI2, - currentGrant, - edgeIDGenerator); - newlyAddedEdges.add(coPI1_2Edge); - edgeUniqueIdentifierToVO.put( - getEdgeUniqueIdentifier(coPI1.getCollaboratorID(), - coPI2.getCollaboratorID()), - coPI1_2Edge); + List coPINodes = + new ArrayList(currentGrantEntry.getValue()); + Collections.sort(coPINodes, new CollaboratorComparator()); + + int numOfCoPIs = coPINodes.size(); + + for (int ii = 0; ii < numOfCoPIs - 1; ii++) { + for (int jj = ii + 1; jj < numOfCoPIs; jj++) { + + Collaborator coPI1 = coPINodes.get(ii); + Collaborator coPI2 = coPINodes.get(jj); + + Collaboration coPI1_2Edge = getExistingEdge(coPI1, + coPI2, + edgeUniqueIdentifierToVO); + + Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey()); + + if (coPI1_2Edge != null) { + coPI1_2Edge.addActivity(currentGrant); + } else { + coPI1_2Edge = new Collaboration(coPI1, + coPI2, + currentGrant, + edgeIDGenerator); + newlyAddedEdges.add(coPI1_2Edge); + edgeUniqueIdentifierToVO.put( + getEdgeUniqueIdentifier(coPI1.getCollaboratorID(), + coPI2.getCollaboratorID()), + coPI1_2Edge); + } + } } + edges.addAll(newlyAddedEdges); } } - edges.addAll(newlyAddedEdges); } - } - } - private void removeLowQualityNodesAndEdges(Set nodes, - Map grantURLToVO, - Map> grantURLToCoPIs, Set edges) { - - Set nodesToBeRemoved = new HashSet(); - for (Map.Entry> currentGrantEntry + private void removeLowQualityNodesAndEdges(Set nodes, + Map grantURLToVO, + Map> grantURLToCoPIs, Set edges) { + + Set nodesToBeRemoved = new HashSet(); + for (Map.Entry> currentGrantEntry : grantURLToCoPIs.entrySet()) { - + if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) { - + Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey()); - + Set edgesToBeRemoved = new HashSet(); - + for (Collaboration currentEdge : edges) { - Set currentCollaboratorGrants = - currentEdge.getCollaborationActivities(); - + Set currentCollaboratorGrants = + currentEdge.getCollaborationActivities(); + if (currentCollaboratorGrants.contains(currentGrant)) { currentCollaboratorGrants.remove(currentGrant); if (currentCollaboratorGrants.isEmpty()) { @@ -624,7 +581,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner } } } - + edges.removeAll(edgesToBeRemoved); for (Collaborator currentCoPI : currentGrantEntry.getValue()) { @@ -634,14 +591,14 @@ public class CoPIGrantCountQueryRunner implements QueryRunner } } } - } - nodes.removeAll(nodesToBeRemoved); - - } + } + nodes.removeAll(nodesToBeRemoved); - private Activity createGrantVO(QuerySolution solution, String grantURL) { - - Activity grant = new Activity(grantURL); + } + + private Activity createGrantVO(QuerySolution solution, String grantURL) { + + Activity grant = new Activity(grantURL); // RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL); // if (grantLabelNode != null) { @@ -649,17 +606,17 @@ public class CoPIGrantCountQueryRunner implements QueryRunner // } - RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE); - if (grantStartYear != null) { - grant.setActivityDate(grantStartYear.toString()); - } else { - grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE); + RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE); if (grantStartYear != null) { grant.setActivityDate(grantStartYear.toString()); - } - } - - //TODO: Verify that grant end date is not required. + } else { + grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE); + if (grantStartYear != null) { + grant.setActivityDate(grantStartYear.toString()); + } + } + + //TODO: Verify that grant end date is not required. /* RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE); if (grantEndDate != null) { @@ -668,10 +625,37 @@ public class CoPIGrantCountQueryRunner implements QueryRunner grantEndDate = solution.get(QueryFieldLabels.GRANT_END_DATE); if(grantEndDate != null){ grant.setGrantEndDate(grantEndDate.toString()); - } + } } */ - - return grant; + + return grant; + } + + private Collaboration getExistingEdge( + Collaborator collaboratingNode1, + Collaborator collaboratingNode2, + Map edgeUniqueIdentifierToVO) { + + String edgeUniqueIdentifier = getEdgeUniqueIdentifier( + collaboratingNode1.getCollaboratorID(), + collaboratingNode2.getCollaboratorID()); + + return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier); + + } + + private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) { + + String separator = "*"; + + if (nodeID1 < nodeID2) { + return nodeID1 + separator + nodeID2; + } else { + return nodeID2 + separator + nodeID1; + } + + } + /** END QUERY RESULT CONSUMER **/ } }