[VIVO-1031] Use streaming result set for co- visualisations

This commit is contained in:
grahamtriggs 2015-10-19 15:51:57 +01:00
parent 208f103629
commit 8fc40fcf98
2 changed files with 457 additions and 485 deletions

View file

@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import net.sf.jga.algorithms.Unique;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.jena.iri.IRI;
@ -23,7 +25,6 @@ import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.RDFNode;
@ -58,10 +59,6 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
private Log log;
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
public CoAuthorshipQueryRunner(String egoURI,
RDFService rdfService, Log log) {
@ -69,18 +66,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
this.rdfService = rdfService;
this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
}
private CollaborationData createQueryResult(ResultSet resultSet) {
private static class QueryResultConsumer extends ResultSetConsumer {
Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>();
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors =
new HashMap<String, Set<Collaborator>>();
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors = new HashMap<String, Set<Collaborator>>();
Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>();
Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>();
@ -88,13 +80,15 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
Set<Collaboration> edges = new HashSet<Collaboration>();
while (resultSet.hasNext()) {
QuerySolution solution = resultSet.nextSolution();
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
@Override
protected void processQuerySolution(QuerySolution qs) {
/*
* We only want to create only ONE ego node.
* */
RDFNode egoAuthorURLNode = solution.get(QueryFieldLabels.AUTHOR_URL);
RDFNode egoAuthorURLNode = qs.get(QueryFieldLabels.AUTHOR_URL);
if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) {
egoNode = nodeURLToVO.get(egoAuthorURLNode.toString());
@ -105,19 +99,19 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
nodes.add(egoNode);
nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.AUTHOR_LABEL);
RDFNode authorLabelNode = qs.get(QueryFieldLabels.AUTHOR_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
}
RDFNode documentNode = solution.get(QueryFieldLabels.DOCUMENT_URL);
RDFNode documentNode = qs.get(QueryFieldLabels.DOCUMENT_URL);
Activity biboDocument;
if (biboDocumentURLToVO.containsKey(documentNode.toString())) {
biboDocument = biboDocumentURLToVO.get(documentNode.toString());
} else {
biboDocument = createDocumentVO(solution, documentNode.toString());
biboDocument = createDocumentVO(qs, documentNode.toString());
biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
}
@ -128,14 +122,14 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
* we do not want a co-author node or Collaboration if the publication has only one
* author and that happens to be the ego.
* */
if (solution.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
continue;
if (qs.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
qs.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
return;
}
Collaborator coAuthorNode;
RDFNode coAuthorURLNode = solution.get(QueryFieldLabels.CO_AUTHOR_URL);
RDFNode coAuthorURLNode = qs.get(QueryFieldLabels.CO_AUTHOR_URL);
if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) {
coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString());
@ -146,7 +140,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
nodes.add(coAuthorNode);
nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode);
RDFNode coAuthorLabelNode = solution.get(QueryFieldLabels.CO_AUTHOR_LABEL);
RDFNode coAuthorLabelNode = qs.get(QueryFieldLabels.CO_AUTHOR_LABEL);
if (coAuthorLabelNode != null) {
coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString());
}
@ -158,11 +152,11 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) {
coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors
.get(biboDocument.getActivityURI());
.get(biboDocument.getActivityURI());
} else {
coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>();
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
coAuthorsForCurrentBiboDocument);
coAuthorsForCurrentBiboDocument);
}
coAuthorsForCurrentBiboDocument.add(coAuthorNode);
@ -183,70 +177,173 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
edges.add(egoCoAuthorEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coAuthorNode.getCollaboratorID()),
coAuthorNode.getCollaboratorID()),
egoCoAuthorEdge);
}
}
@Override
protected void endProcessing() {
/*
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
/*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
}
public CollaborationData getCollaborationData() {
return new CoAuthorshipData(egoNode, nodes, edges);
}
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
Activity biboDocument = new Activity(documentURL);
RDFNode publicationDateNode = solution.get(QueryFieldLabels.DOCUMENT_PUBLICATION_DATE);
if (publicationDateNode != null) {
biboDocument.setActivityDate(publicationDateNode.toString());
}
return biboDocument;
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
/*
* This method takes out all the authors & edges between authors that belong to documents
* that have more than 100 authors. We conjecture that these papers do not provide much
* insight. However, we have left the documents be.
*
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges);
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
* ego, A & B then we have already created edges like,
* ego - A
* ego - B
* The below sub-routine will take care of,
* A - B
*
* We are side-effecting "edges" here. The only reason to do this is because we are adding
* edges en masse for all the co-authors on all the publications considered so far. The
* other reason being we dont want to compare against 2 sets of edges (edges created before
* & co-author edges created during the course of this method) when we are creating a new
* Collaboration.
* */
createCoAuthorEdges(biboDocumentURLToVO,
biboDocumentURLToCoAuthors,
edges,
edgeUniqueIdentifierToVO);
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-authors we need to create a list out of the set first.
* */
List<Collaborator> coAuthorNodes =
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
Collections.sort(coAuthorNodes, new CollaboratorComparator());
return new CoAuthorshipData(egoNode, nodes, edges);
}
int numOfCoAuthors = coAuthorNodes.size();
private void removeLowQualityNodesAndEdges(
Set<Collaborator> nodes,
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
Collaborator coAuthor1 = coAuthorNodes.get(ii);
Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private void removeLowQualityNodesAndEdges(
Set<Collaborator> nodes,
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry.getKey());
.get(currentBiboDocumentEntry.getKey());
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorDocuments =
currentEdge.getCollaborationActivities();
currentEdge.getCollaborationActivities();
if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
currentCollaboratorDocuments.remove(currentBiboDocument);
@ -265,114 +362,11 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
}
}
}
}
nodes.removeAll(nodesToBeRemoved);
}
private void createCoAuthorEdges(
Map<String, Activity> biboDocumentURLToVO,
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
: biboDocumentURLToCoAuthors.entrySet()) {
/*
* If there was only one co-author (other than ego) then we dont have to create any
* edges. so the below condition will take care of that.
*
* We are restricting edges between co-author if a particular document has more than
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentBiboDocumentEntry.getValue().size() > 1
&& currentBiboDocumentEntry.getValue().size()
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-authors we need to create a list out of the set first.
* */
List<Collaborator> coAuthorNodes =
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
Collections.sort(coAuthorNodes, new CollaboratorComparator());
int numOfCoAuthors = coAuthorNodes.size();
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
Collaborator coAuthor1 = coAuthorNodes.get(ii);
Collaborator coAuthor2 = coAuthorNodes.get(jj);
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
coAuthor2,
edgeUniqueIdentifierToVO);
Activity currentBiboDocument = biboDocumentURLToVO
.get(currentBiboDocumentEntry
.getKey());
if (coAuthor1_2Edge != null) {
coAuthor1_2Edge.addActivity(currentBiboDocument);
} else {
coAuthor1_2Edge = new Collaboration(coAuthor1,
coAuthor2,
currentBiboDocument,
edgeIDGenerator);
newlyAddedEdges.add(coAuthor1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
coAuthor2.getCollaboratorID()),
coAuthor1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
nodes.removeAll(nodesToBeRemoved);
}
}
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
Activity biboDocument = new Activity(documentURL);
RDFNode publicationDateNode = solution.get(QueryFieldLabels.DOCUMENT_PUBLICATION_DATE);
if (publicationDateNode != null) {
biboDocument.setActivityDate(publicationDateNode.toString());
}
return biboDocument;
/* END QUERY RUNNER */
}
private String generateEgoCoAuthorshipSparqlQuery(String queryURI) {
@ -455,19 +449,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
throw new MalformedQueryParametersException("URI parameter is either null or empty.");
}
InputStream is = null;
ResultSet rs = null;
try {
is = rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), RDFService.ResultFormat.JSON);
rs = ResultSetFactory.fromJSON(is);
data = createQueryResult(rs);
QueryResultConsumer consumer = new QueryResultConsumer();
rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), consumer);
data = consumer.getCollaborationData();
} catch (RDFServiceException e) {
log.error("Unable to execute query", e);
throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
}
CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry();

View file

@ -2,7 +2,6 @@
package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
@ -13,9 +12,9 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.hp.hpl.jena.query.ResultSetFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -23,14 +22,8 @@ import org.apache.jena.iri.IRI;
import org.apache.jena.iri.IRIFactory;
import org.apache.jena.iri.Violation;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData;
@ -60,10 +53,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
private UniqueIDGenerator nodeIDGenerator;
private UniqueIDGenerator edgeIDGenerator;
private long before, after;
private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = ""
@ -95,9 +84,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
this.egoURI = egoURI;
this.rdfService = rdfService;
// this.log = log;
this.nodeIDGenerator = new UniqueIDGenerator();
this.edgeIDGenerator = new UniqueIDGenerator();
}
private String generateEgoCoPIquery(String queryURI) {
@ -294,19 +280,13 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
before = System.currentTimeMillis();
InputStream is = null;
ResultSet rs = null;
try {
is = rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), RDFService.ResultFormat.JSON);
rs = ResultSetFactory.fromJSON(is);
data = createQueryResult(rs);
QueryResultConsumer consumer = new QueryResultConsumer();
rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), consumer);
data = consumer.getData();
} catch (RDFServiceException e) {
log.error("Unable to execute query", e);
throw new RuntimeException(e);
} finally {
if (is != null) {
try { is.close(); } catch (Throwable t) { }
}
}
after = System.currentTimeMillis();
@ -356,34 +336,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
private CollaborationData createQueryResult(ResultSet resultSet) {
private static class QueryResultConsumer extends ResultSetConsumer {
Set<Collaborator> nodes = new HashSet<Collaborator>();
Map<String, Activity> grantURLToVO = new HashMap<String, Activity>();
@ -395,111 +348,116 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
Set<Collaboration> edges = new HashSet<Collaboration>();
before = System.currentTimeMillis();
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
while (resultSet.hasNext()) {
QuerySolution solution = resultSet.nextSolution();
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
@Override
protected void processQuerySolution(QuerySolution qs) {
/*
* We only want to create only ONE ego node.
* */
RDFNode egoPIURLNode = solution.get(QueryFieldLabels.PI_URL);
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
RDFNode egoPIURLNode = qs.get(QueryFieldLabels.PI_URL);
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
} else {
} else {
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode);
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
nodes.add(egoNode);
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
RDFNode authorLabelNode = solution.get(QueryFieldLabels.PI_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
RDFNode authorLabelNode = qs.get(QueryFieldLabels.PI_LABEL);
if (authorLabelNode != null) {
egoNode.setCollaboratorName(authorLabelNode.toString());
}
log.debug("PI: " + egoNode.getIndividualLabel());
}
log.debug("PI: " + egoNode.getIndividualLabel());
RDFNode grantNode = solution.get(QueryFieldLabels.GRANT_URL);
Activity grant;
RDFNode grantNode = qs.get(QueryFieldLabels.GRANT_URL);
Activity grant;
if (grantURLToVO.containsKey(grantNode.toString())) {
grant = grantURLToVO.get(grantNode.toString());
} else {
grant = createGrantVO(solution, grantNode.toString());
grantURLToVO.put(grantNode.toString(), grant);
}
if (grantURLToVO.containsKey(grantNode.toString())) {
grant = grantURLToVO.get(grantNode.toString());
} else {
grant = createGrantVO(qs, grantNode.toString());
grantURLToVO.put(grantNode.toString(), grant);
}
egoNode.addActivity(grant);
log.debug("Adding grant: " + grant.getIndividualLabel());
egoNode.addActivity(grant);
log.debug("Adding grant: " + grant.getIndividualLabel());
/*
* After some discussion we concluded that for the purpose of this visualization
* we do not want a co-pi node or edge if the grant has only one
* pi and that happens to be the ego.
* */
if (solution.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
solution.get(QueryFieldLabels.CO_PI_URL).toString())) {
continue;
if (qs.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
qs.get(QueryFieldLabels.CO_PI_URL).toString())) {
return;
}
Collaborator coPINode;
RDFNode coPIURLNode = qs.get(QueryFieldLabels.CO_PI_URL);
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
coPINode = nodeURLToVO.get(coPIURLNode.toString());
} else {
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
nodes.add(coPINode);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = qs.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
}
}
Collaborator coPINode;
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
RDFNode coPIURLNode = solution.get(QueryFieldLabels.CO_PI_URL);
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
Set<Collaborator> coPIsForCurrentGrant;
coPINode = nodeURLToVO.get(coPIURLNode.toString());
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
} else {
coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
nodes.add(coPINode);
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
RDFNode coPILabelNode = solution.get(QueryFieldLabels.CO_PI_LABEL);
if (coPILabelNode != null) {
coPINode.setCollaboratorName(coPILabelNode.toString());
}
}
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
coPINode.addActivity(grant);
Set<Collaborator> coPIsForCurrentGrant;
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
} else {
coPIsForCurrentGrant = new HashSet<Collaborator>();
grantURLToCoPIs.put(grant.getActivityURI(),
coPIsForCurrentGrant);
}
coPIsForCurrentGrant.add(coPINode);
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
Collaboration egoCoPIEdge =
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
Collaboration egoCoPIEdge =
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
/*
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
* & current coPINode. Else create a new edge, add it to the edges set & add
* the collaborator grant to it.
* */
if (egoCoPIEdge != null) {
egoCoPIEdge.addActivity(grant);
} else {
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
edges.add(egoCoPIEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coPINode.getCollaboratorID()),
egoCoPIEdge);
}
if (egoCoPIEdge != null) {
egoCoPIEdge.addActivity(grant);
} else {
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
edges.add(egoCoPIEdge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
coPINode.getCollaboratorID()),
egoCoPIEdge);
}
}
@Override
protected void endProcessing() {
super.endProcessing();
/*
* This method takes out all the PIs & edges between PIs that belong to grants
* that have more than 100 PIs. We conjecture that these grants do not provide much
@ -507,9 +465,9 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* This method side-effects "nodes" & "edges".
* */
removeLowQualityNodesAndEdges(nodes,
grantURLToVO,
grantURLToCoPIs,
edges);
grantURLToVO,
grantURLToCoPIs,
edges);
/*
* We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI
* ego, A & B then we have already created edges like,
@ -525,23 +483,22 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* a new edge.
* */
createCoPIEdges(grantURLToVO,
grantURLToCoPIs,
edges,
edgeUniqueIdentifierToVO);
grantURLToCoPIs,
edges,
edgeUniqueIdentifierToVO);
}
after = System.currentTimeMillis();
log.debug("Time taken to iterate through the ResultSet of SELECT queries is in ms: "
+ (after - before));
public CollaborationData getData() {
return new CoInvestigationData(egoNode, nodes, edges);
}
}
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
/*
* If there was only one co-PI (other than ego) then we dont have to create any
@ -551,60 +508,60 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
* 100 co-PIs. Our conjecture is that such edges do not provide any good insight
* & causes unnecessary computations causing the server to time-out.
* */
if (currentGrantEntry.getValue().size() > 1
&& currentGrantEntry.getValue().size()
if (currentGrantEntry.getValue().size() > 1
&& currentGrantEntry.getValue().size()
<= MAX_PI_PER_GRANT_ALLOWED) {
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
/*
* In order to leverage the nested "for loop" for making edges between all the
* co-PIs we need to create a list out of the set first.
* */
List<Collaborator> coPINodes =
new ArrayList<Collaborator>(currentGrantEntry.getValue());
Collections.sort(coPINodes, new CollaboratorComparator());
List<Collaborator> coPINodes =
new ArrayList<Collaborator>(currentGrantEntry.getValue());
Collections.sort(coPINodes, new CollaboratorComparator());
int numOfCoPIs = coPINodes.size();
int numOfCoPIs = coPINodes.size();
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
Collaborator coPI1 = coPINodes.get(ii);
Collaborator coPI2 = coPINodes.get(jj);
Collaborator coPI1 = coPINodes.get(ii);
Collaborator coPI2 = coPINodes.get(jj);
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
coPI2,
edgeUniqueIdentifierToVO);
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
coPI2,
edgeUniqueIdentifierToVO);
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
if (coPI1_2Edge != null) {
coPI1_2Edge.addActivity(currentGrant);
} else {
coPI1_2Edge = new Collaboration(coPI1,
coPI2,
currentGrant,
edgeIDGenerator);
newlyAddedEdges.add(coPI1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
coPI2.getCollaboratorID()),
coPI1_2Edge);
if (coPI1_2Edge != null) {
coPI1_2Edge.addActivity(currentGrant);
} else {
coPI1_2Edge = new Collaboration(coPI1,
coPI2,
currentGrant,
edgeIDGenerator);
newlyAddedEdges.add(coPI1_2Edge);
edgeUniqueIdentifierToVO.put(
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
coPI2.getCollaboratorID()),
coPI1_2Edge);
}
}
}
edges.addAll(newlyAddedEdges);
}
}
edges.addAll(newlyAddedEdges);
}
}
}
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
Map<String, Activity> grantURLToVO,
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
: grantURLToCoPIs.entrySet()) {
if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) {
@ -615,7 +572,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
for (Collaboration currentEdge : edges) {
Set<Activity> currentCollaboratorGrants =
currentEdge.getCollaborationActivities();
currentEdge.getCollaborationActivities();
if (currentCollaboratorGrants.contains(currentGrant)) {
currentCollaboratorGrants.remove(currentGrant);
@ -634,14 +591,14 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
}
}
}
nodes.removeAll(nodesToBeRemoved);
}
nodes.removeAll(nodesToBeRemoved);
}
private Activity createGrantVO(QuerySolution solution, String grantURL) {
private Activity createGrantVO(QuerySolution solution, String grantURL) {
Activity grant = new Activity(grantURL);
Activity grant = new Activity(grantURL);
// RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL);
// if (grantLabelNode != null) {
@ -649,17 +606,17 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
// }
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
} else {
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
if (grantStartYear != null) {
grant.setActivityDate(grantStartYear.toString());
}
}
}
//TODO: Verify that grant end date is not required.
//TODO: Verify that grant end date is not required.
/*
RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE);
if (grantEndDate != null) {
@ -672,6 +629,33 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
}
*/
return grant;
return grant;
}
private Collaboration getExistingEdge(
Collaborator collaboratingNode1,
Collaborator collaboratingNode2,
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
collaboratingNode1.getCollaboratorID(),
collaboratingNode2.getCollaboratorID());
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
}
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
String separator = "*";
if (nodeID1 < nodeID2) {
return nodeID1 + separator + nodeID2;
} else {
return nodeID2 + separator + nodeID1;
}
}
/** END QUERY RESULT CONSUMER **/
}
}