[VIVO-1031] Use streaming result set for co- visualisations
This commit is contained in:
parent
208f103629
commit
8fc40fcf98
2 changed files with 457 additions and 485 deletions
|
@ -16,6 +16,8 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
import com.hp.hpl.jena.query.ResultSetFactory;
|
import com.hp.hpl.jena.query.ResultSetFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
|
||||||
|
import net.sf.jga.algorithms.Unique;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.jena.iri.IRI;
|
import org.apache.jena.iri.IRI;
|
||||||
|
@ -23,7 +25,6 @@ import org.apache.jena.iri.IRIFactory;
|
||||||
import org.apache.jena.iri.Violation;
|
import org.apache.jena.iri.Violation;
|
||||||
|
|
||||||
import com.hp.hpl.jena.query.QuerySolution;
|
import com.hp.hpl.jena.query.QuerySolution;
|
||||||
import com.hp.hpl.jena.query.ResultSet;
|
|
||||||
import com.hp.hpl.jena.query.Syntax;
|
import com.hp.hpl.jena.query.Syntax;
|
||||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||||
|
|
||||||
|
@ -58,312 +59,168 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
|
||||||
|
|
||||||
private Log log;
|
private Log log;
|
||||||
|
|
||||||
private UniqueIDGenerator nodeIDGenerator;
|
|
||||||
|
|
||||||
private UniqueIDGenerator edgeIDGenerator;
|
|
||||||
|
|
||||||
public CoAuthorshipQueryRunner(String egoURI,
|
public CoAuthorshipQueryRunner(String egoURI,
|
||||||
RDFService rdfService, Log log) {
|
RDFService rdfService, Log log) {
|
||||||
|
|
||||||
this.egoURI = egoURI;
|
this.egoURI = egoURI;
|
||||||
this.rdfService = rdfService;
|
this.rdfService = rdfService;
|
||||||
this.log = log;
|
this.log = log;
|
||||||
|
|
||||||
this.nodeIDGenerator = new UniqueIDGenerator();
|
|
||||||
this.edgeIDGenerator = new UniqueIDGenerator();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private CollaborationData createQueryResult(ResultSet resultSet) {
|
private static class QueryResultConsumer extends ResultSetConsumer {
|
||||||
|
|
||||||
Set<Collaborator> nodes = new HashSet<Collaborator>();
|
Set<Collaborator> nodes = new HashSet<Collaborator>();
|
||||||
|
|
||||||
Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>();
|
Map<String, Activity> biboDocumentURLToVO = new HashMap<String, Activity>();
|
||||||
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors =
|
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors = new HashMap<String, Set<Collaborator>>();
|
||||||
new HashMap<String, Set<Collaborator>>();
|
|
||||||
Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>();
|
Map<String, Collaborator> nodeURLToVO = new HashMap<String, Collaborator>();
|
||||||
Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>();
|
Map<String, Collaboration> edgeUniqueIdentifierToVO = new HashMap<String, Collaboration>();
|
||||||
|
|
||||||
Collaborator egoNode = null;
|
Collaborator egoNode = null;
|
||||||
|
|
||||||
Set<Collaboration> edges = new HashSet<Collaboration>();
|
Set<Collaboration> edges = new HashSet<Collaboration>();
|
||||||
|
|
||||||
while (resultSet.hasNext()) {
|
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
|
||||||
QuerySolution solution = resultSet.nextSolution();
|
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void processQuerySolution(QuerySolution qs) {
|
||||||
/*
|
/*
|
||||||
* We only want to create only ONE ego node.
|
* We only want to create only ONE ego node.
|
||||||
* */
|
* */
|
||||||
RDFNode egoAuthorURLNode = solution.get(QueryFieldLabels.AUTHOR_URL);
|
RDFNode egoAuthorURLNode = qs.get(QueryFieldLabels.AUTHOR_URL);
|
||||||
if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) {
|
if (nodeURLToVO.containsKey(egoAuthorURLNode.toString())) {
|
||||||
|
|
||||||
egoNode = nodeURLToVO.get(egoAuthorURLNode.toString());
|
egoNode = nodeURLToVO.get(egoAuthorURLNode.toString());
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
egoNode = new Collaborator(egoAuthorURLNode.toString(), nodeIDGenerator);
|
egoNode = new Collaborator(egoAuthorURLNode.toString(), nodeIDGenerator);
|
||||||
nodes.add(egoNode);
|
nodes.add(egoNode);
|
||||||
nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode);
|
nodeURLToVO.put(egoAuthorURLNode.toString(), egoNode);
|
||||||
|
|
||||||
RDFNode authorLabelNode = solution.get(QueryFieldLabels.AUTHOR_LABEL);
|
RDFNode authorLabelNode = qs.get(QueryFieldLabels.AUTHOR_LABEL);
|
||||||
if (authorLabelNode != null) {
|
if (authorLabelNode != null) {
|
||||||
egoNode.setCollaboratorName(authorLabelNode.toString());
|
egoNode.setCollaboratorName(authorLabelNode.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RDFNode documentNode = solution.get(QueryFieldLabels.DOCUMENT_URL);
|
RDFNode documentNode = qs.get(QueryFieldLabels.DOCUMENT_URL);
|
||||||
Activity biboDocument;
|
Activity biboDocument;
|
||||||
|
|
||||||
if (biboDocumentURLToVO.containsKey(documentNode.toString())) {
|
if (biboDocumentURLToVO.containsKey(documentNode.toString())) {
|
||||||
biboDocument = biboDocumentURLToVO.get(documentNode.toString());
|
biboDocument = biboDocumentURLToVO.get(documentNode.toString());
|
||||||
} else {
|
} else {
|
||||||
biboDocument = createDocumentVO(solution, documentNode.toString());
|
biboDocument = createDocumentVO(qs, documentNode.toString());
|
||||||
biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
|
biboDocumentURLToVO.put(documentNode.toString(), biboDocument);
|
||||||
}
|
}
|
||||||
|
|
||||||
egoNode.addActivity(biboDocument);
|
egoNode.addActivity(biboDocument);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After some discussion we concluded that for the purpose of this visualization
|
* After some discussion we concluded that for the purpose of this visualization
|
||||||
* we do not want a co-author node or Collaboration if the publication has only one
|
* we do not want a co-author node or Collaboration if the publication has only one
|
||||||
* author and that happens to be the ego.
|
* author and that happens to be the ego.
|
||||||
* */
|
* */
|
||||||
if (solution.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
|
if (qs.get(QueryFieldLabels.AUTHOR_URL).toString().equalsIgnoreCase(
|
||||||
solution.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
|
qs.get(QueryFieldLabels.CO_AUTHOR_URL).toString())) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Collaborator coAuthorNode;
|
Collaborator coAuthorNode;
|
||||||
|
|
||||||
RDFNode coAuthorURLNode = solution.get(QueryFieldLabels.CO_AUTHOR_URL);
|
RDFNode coAuthorURLNode = qs.get(QueryFieldLabels.CO_AUTHOR_URL);
|
||||||
if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) {
|
if (nodeURLToVO.containsKey(coAuthorURLNode.toString())) {
|
||||||
|
|
||||||
coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString());
|
coAuthorNode = nodeURLToVO.get(coAuthorURLNode.toString());
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
coAuthorNode = new Collaborator(coAuthorURLNode.toString(), nodeIDGenerator);
|
coAuthorNode = new Collaborator(coAuthorURLNode.toString(), nodeIDGenerator);
|
||||||
nodes.add(coAuthorNode);
|
nodes.add(coAuthorNode);
|
||||||
nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode);
|
nodeURLToVO.put(coAuthorURLNode.toString(), coAuthorNode);
|
||||||
|
|
||||||
RDFNode coAuthorLabelNode = solution.get(QueryFieldLabels.CO_AUTHOR_LABEL);
|
RDFNode coAuthorLabelNode = qs.get(QueryFieldLabels.CO_AUTHOR_LABEL);
|
||||||
if (coAuthorLabelNode != null) {
|
if (coAuthorLabelNode != null) {
|
||||||
coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString());
|
coAuthorNode.setCollaboratorName(coAuthorLabelNode.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
coAuthorNode.addActivity(biboDocument);
|
coAuthorNode.addActivity(biboDocument);
|
||||||
|
|
||||||
Set<Collaborator> coAuthorsForCurrentBiboDocument;
|
Set<Collaborator> coAuthorsForCurrentBiboDocument;
|
||||||
|
|
||||||
if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) {
|
if (biboDocumentURLToCoAuthors.containsKey(biboDocument.getActivityURI())) {
|
||||||
coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors
|
coAuthorsForCurrentBiboDocument = biboDocumentURLToCoAuthors
|
||||||
.get(biboDocument.getActivityURI());
|
.get(biboDocument.getActivityURI());
|
||||||
} else {
|
} else {
|
||||||
coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>();
|
coAuthorsForCurrentBiboDocument = new HashSet<Collaborator>();
|
||||||
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
|
biboDocumentURLToCoAuthors.put(biboDocument.getActivityURI(),
|
||||||
coAuthorsForCurrentBiboDocument);
|
coAuthorsForCurrentBiboDocument);
|
||||||
}
|
}
|
||||||
|
|
||||||
coAuthorsForCurrentBiboDocument.add(coAuthorNode);
|
coAuthorsForCurrentBiboDocument.add(coAuthorNode);
|
||||||
|
|
||||||
Collaboration egoCoAuthorEdge =
|
Collaboration egoCoAuthorEdge =
|
||||||
getExistingEdge(egoNode, coAuthorNode, edgeUniqueIdentifierToVO);
|
getExistingEdge(egoNode, coAuthorNode, edgeUniqueIdentifierToVO);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the
|
* If "egoCoAuthorEdge" is null it means that no Collaboration exists in between the
|
||||||
* egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges
|
* egoNode & current coAuthorNode. Else create a new Collaboration, add it to the edges
|
||||||
* set & add the collaborator document to it.
|
* set & add the collaborator document to it.
|
||||||
* */
|
* */
|
||||||
if (egoCoAuthorEdge != null) {
|
if (egoCoAuthorEdge != null) {
|
||||||
egoCoAuthorEdge.addActivity(biboDocument);
|
egoCoAuthorEdge.addActivity(biboDocument);
|
||||||
} else {
|
} else {
|
||||||
egoCoAuthorEdge =
|
egoCoAuthorEdge =
|
||||||
new Collaboration(egoNode, coAuthorNode, biboDocument, edgeIDGenerator);
|
new Collaboration(egoNode, coAuthorNode, biboDocument, edgeIDGenerator);
|
||||||
edges.add(egoCoAuthorEdge);
|
edges.add(egoCoAuthorEdge);
|
||||||
edgeUniqueIdentifierToVO.put(
|
edgeUniqueIdentifierToVO.put(
|
||||||
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
|
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
|
||||||
coAuthorNode.getCollaboratorID()),
|
coAuthorNode.getCollaboratorID()),
|
||||||
egoCoAuthorEdge);
|
egoCoAuthorEdge);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This method takes out all the authors & edges between authors that belong to documents
|
|
||||||
* that have more than 100 authors. We conjecture that these papers do not provide much
|
|
||||||
* insight. However, we have left the documents be.
|
|
||||||
*
|
|
||||||
* This method side-effects "nodes" & "edges".
|
|
||||||
* */
|
|
||||||
removeLowQualityNodesAndEdges(nodes,
|
|
||||||
biboDocumentURLToVO,
|
|
||||||
biboDocumentURLToCoAuthors,
|
|
||||||
edges);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
|
|
||||||
* ego, A & B then we have already created edges like,
|
|
||||||
* ego - A
|
|
||||||
* ego - B
|
|
||||||
* The below sub-routine will take care of,
|
|
||||||
* A - B
|
|
||||||
*
|
|
||||||
* We are side-effecting "edges" here. The only reason to do this is because we are adding
|
|
||||||
* edges en masse for all the co-authors on all the publications considered so far. The
|
|
||||||
* other reason being we dont want to compare against 2 sets of edges (edges created before
|
|
||||||
* & co-author edges created during the course of this method) when we are creating a new
|
|
||||||
* Collaboration.
|
|
||||||
* */
|
|
||||||
createCoAuthorEdges(biboDocumentURLToVO,
|
|
||||||
biboDocumentURLToCoAuthors,
|
|
||||||
edges,
|
|
||||||
edgeUniqueIdentifierToVO);
|
|
||||||
|
|
||||||
|
|
||||||
return new CoAuthorshipData(egoNode, nodes, edges);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void removeLowQualityNodesAndEdges(
|
@Override
|
||||||
Set<Collaborator> nodes,
|
protected void endProcessing() {
|
||||||
Map<String, Activity> biboDocumentURLToVO,
|
|
||||||
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
|
|
||||||
Set<Collaboration> edges) {
|
|
||||||
|
|
||||||
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
|
|
||||||
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
|
|
||||||
: biboDocumentURLToCoAuthors.entrySet()) {
|
|
||||||
|
|
||||||
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
|
|
||||||
|
|
||||||
Activity currentBiboDocument = biboDocumentURLToVO
|
|
||||||
.get(currentBiboDocumentEntry.getKey());
|
|
||||||
|
|
||||||
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
|
|
||||||
|
|
||||||
for (Collaboration currentEdge : edges) {
|
|
||||||
Set<Activity> currentCollaboratorDocuments =
|
|
||||||
currentEdge.getCollaborationActivities();
|
|
||||||
|
|
||||||
if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
|
|
||||||
currentCollaboratorDocuments.remove(currentBiboDocument);
|
|
||||||
if (currentCollaboratorDocuments.isEmpty()) {
|
|
||||||
edgesToBeRemoved.add(currentEdge);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
edges.removeAll(edgesToBeRemoved);
|
|
||||||
|
|
||||||
for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) {
|
|
||||||
currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument);
|
|
||||||
if (currentCoAuthor.getCollaboratorActivities().isEmpty()) {
|
|
||||||
nodesToBeRemoved.add(currentCoAuthor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nodes.removeAll(nodesToBeRemoved);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createCoAuthorEdges(
|
|
||||||
Map<String, Activity> biboDocumentURLToVO,
|
|
||||||
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
|
|
||||||
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
|
||||||
|
|
||||||
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
|
|
||||||
: biboDocumentURLToCoAuthors.entrySet()) {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If there was only one co-author (other than ego) then we dont have to create any
|
|
||||||
* edges. so the below condition will take care of that.
|
|
||||||
*
|
|
||||||
* We are restricting edges between co-author if a particular document has more than
|
|
||||||
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
|
|
||||||
* & causes unnecessary computations causing the server to time-out.
|
|
||||||
* */
|
|
||||||
if (currentBiboDocumentEntry.getValue().size() > 1
|
|
||||||
&& currentBiboDocumentEntry.getValue().size()
|
|
||||||
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
|
|
||||||
|
|
||||||
|
|
||||||
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In order to leverage the nested "for loop" for making edges between all the
|
* This method takes out all the authors & edges between authors that belong to documents
|
||||||
* co-authors we need to create a list out of the set first.
|
* that have more than 100 authors. We conjecture that these papers do not provide much
|
||||||
|
* insight. However, we have left the documents be.
|
||||||
|
*
|
||||||
|
* This method side-effects "nodes" & "edges".
|
||||||
|
* */
|
||||||
|
removeLowQualityNodesAndEdges(nodes,
|
||||||
|
biboDocumentURLToVO,
|
||||||
|
biboDocumentURLToCoAuthors,
|
||||||
|
edges);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to create edges between 2 co-authors. E.g. On a paper there were 3 authors
|
||||||
|
* ego, A & B then we have already created edges like,
|
||||||
|
* ego - A
|
||||||
|
* ego - B
|
||||||
|
* The below sub-routine will take care of,
|
||||||
|
* A - B
|
||||||
|
*
|
||||||
|
* We are side-effecting "edges" here. The only reason to do this is because we are adding
|
||||||
|
* edges en masse for all the co-authors on all the publications considered so far. The
|
||||||
|
* other reason being we dont want to compare against 2 sets of edges (edges created before
|
||||||
|
* & co-author edges created during the course of this method) when we are creating a new
|
||||||
|
* Collaboration.
|
||||||
* */
|
* */
|
||||||
List<Collaborator> coAuthorNodes =
|
createCoAuthorEdges(biboDocumentURLToVO,
|
||||||
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
|
biboDocumentURLToCoAuthors,
|
||||||
|
edges,
|
||||||
Collections.sort(coAuthorNodes, new CollaboratorComparator());
|
edgeUniqueIdentifierToVO);
|
||||||
|
|
||||||
int numOfCoAuthors = coAuthorNodes.size();
|
|
||||||
|
|
||||||
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
|
|
||||||
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
|
|
||||||
|
|
||||||
Collaborator coAuthor1 = coAuthorNodes.get(ii);
|
|
||||||
Collaborator coAuthor2 = coAuthorNodes.get(jj);
|
|
||||||
|
|
||||||
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
|
|
||||||
coAuthor2,
|
|
||||||
edgeUniqueIdentifierToVO);
|
|
||||||
|
|
||||||
Activity currentBiboDocument = biboDocumentURLToVO
|
|
||||||
.get(currentBiboDocumentEntry
|
|
||||||
.getKey());
|
|
||||||
|
|
||||||
if (coAuthor1_2Edge != null) {
|
|
||||||
coAuthor1_2Edge.addActivity(currentBiboDocument);
|
|
||||||
} else {
|
|
||||||
coAuthor1_2Edge = new Collaboration(coAuthor1,
|
|
||||||
coAuthor2,
|
|
||||||
currentBiboDocument,
|
|
||||||
edgeIDGenerator);
|
|
||||||
newlyAddedEdges.add(coAuthor1_2Edge);
|
|
||||||
edgeUniqueIdentifierToVO.put(
|
|
||||||
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
|
|
||||||
coAuthor2.getCollaboratorID()),
|
|
||||||
coAuthor1_2Edge);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
edges.addAll(newlyAddedEdges);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private Collaboration getExistingEdge(
|
public CollaborationData getCollaborationData() {
|
||||||
Collaborator collaboratingNode1,
|
return new CoAuthorshipData(egoNode, nodes, edges);
|
||||||
Collaborator collaboratingNode2,
|
|
||||||
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
|
||||||
|
|
||||||
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
|
|
||||||
collaboratingNode1.getCollaboratorID(),
|
|
||||||
collaboratingNode2.getCollaboratorID());
|
|
||||||
|
|
||||||
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
|
|
||||||
|
|
||||||
String separator = "*";
|
|
||||||
|
|
||||||
if (nodeID1 < nodeID2) {
|
|
||||||
return nodeID1 + separator + nodeID2;
|
|
||||||
} else {
|
|
||||||
return nodeID2 + separator + nodeID1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
|
private Activity createDocumentVO(QuerySolution solution, String documentURL) {
|
||||||
|
|
||||||
Activity biboDocument = new Activity(documentURL);
|
Activity biboDocument = new Activity(documentURL);
|
||||||
|
|
||||||
|
@ -373,8 +230,145 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
|
||||||
}
|
}
|
||||||
|
|
||||||
return biboDocument;
|
return biboDocument;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
|
||||||
|
String separator = "*";
|
||||||
|
|
||||||
|
if (nodeID1 < nodeID2) {
|
||||||
|
return nodeID1 + separator + nodeID2;
|
||||||
|
} else {
|
||||||
|
return nodeID2 + separator + nodeID1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createCoAuthorEdges(
|
||||||
|
Map<String, Activity> biboDocumentURLToVO,
|
||||||
|
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors, Set<Collaboration> edges,
|
||||||
|
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
||||||
|
|
||||||
|
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
|
||||||
|
: biboDocumentURLToCoAuthors.entrySet()) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there was only one co-author (other than ego) then we dont have to create any
|
||||||
|
* edges. so the below condition will take care of that.
|
||||||
|
*
|
||||||
|
* We are restricting edges between co-author if a particular document has more than
|
||||||
|
* 100 co-authors. Our conjecture is that such edges do not provide any good insight
|
||||||
|
* & causes unnecessary computations causing the server to time-out.
|
||||||
|
* */
|
||||||
|
if (currentBiboDocumentEntry.getValue().size() > 1
|
||||||
|
&& currentBiboDocumentEntry.getValue().size()
|
||||||
|
<= MAX_AUTHORS_PER_PAPER_ALLOWED) {
|
||||||
|
|
||||||
|
|
||||||
|
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to leverage the nested "for loop" for making edges between all the
|
||||||
|
* co-authors we need to create a list out of the set first.
|
||||||
|
* */
|
||||||
|
List<Collaborator> coAuthorNodes =
|
||||||
|
new ArrayList<Collaborator>(currentBiboDocumentEntry.getValue());
|
||||||
|
|
||||||
|
Collections.sort(coAuthorNodes, new CollaboratorComparator());
|
||||||
|
|
||||||
|
int numOfCoAuthors = coAuthorNodes.size();
|
||||||
|
|
||||||
|
for (int ii = 0; ii < numOfCoAuthors - 1; ii++) {
|
||||||
|
for (int jj = ii + 1; jj < numOfCoAuthors; jj++) {
|
||||||
|
|
||||||
|
Collaborator coAuthor1 = coAuthorNodes.get(ii);
|
||||||
|
Collaborator coAuthor2 = coAuthorNodes.get(jj);
|
||||||
|
|
||||||
|
Collaboration coAuthor1_2Edge = getExistingEdge(coAuthor1,
|
||||||
|
coAuthor2,
|
||||||
|
edgeUniqueIdentifierToVO);
|
||||||
|
|
||||||
|
Activity currentBiboDocument = biboDocumentURLToVO
|
||||||
|
.get(currentBiboDocumentEntry
|
||||||
|
.getKey());
|
||||||
|
|
||||||
|
if (coAuthor1_2Edge != null) {
|
||||||
|
coAuthor1_2Edge.addActivity(currentBiboDocument);
|
||||||
|
} else {
|
||||||
|
coAuthor1_2Edge = new Collaboration(coAuthor1,
|
||||||
|
coAuthor2,
|
||||||
|
currentBiboDocument,
|
||||||
|
edgeIDGenerator);
|
||||||
|
newlyAddedEdges.add(coAuthor1_2Edge);
|
||||||
|
edgeUniqueIdentifierToVO.put(
|
||||||
|
getEdgeUniqueIdentifier(coAuthor1.getCollaboratorID(),
|
||||||
|
coAuthor2.getCollaboratorID()),
|
||||||
|
coAuthor1_2Edge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
edges.addAll(newlyAddedEdges);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Collaboration getExistingEdge(
|
||||||
|
Collaborator collaboratingNode1,
|
||||||
|
Collaborator collaboratingNode2,
|
||||||
|
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
||||||
|
|
||||||
|
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
|
||||||
|
collaboratingNode1.getCollaboratorID(),
|
||||||
|
collaboratingNode2.getCollaboratorID());
|
||||||
|
|
||||||
|
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeLowQualityNodesAndEdges(
|
||||||
|
Set<Collaborator> nodes,
|
||||||
|
Map<String, Activity> biboDocumentURLToVO,
|
||||||
|
Map<String, Set<Collaborator>> biboDocumentURLToCoAuthors,
|
||||||
|
Set<Collaboration> edges) {
|
||||||
|
|
||||||
|
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
|
||||||
|
for (Map.Entry<String, Set<Collaborator>> currentBiboDocumentEntry
|
||||||
|
: biboDocumentURLToCoAuthors.entrySet()) {
|
||||||
|
|
||||||
|
if (currentBiboDocumentEntry.getValue().size() > MAX_AUTHORS_PER_PAPER_ALLOWED) {
|
||||||
|
|
||||||
|
Activity currentBiboDocument = biboDocumentURLToVO
|
||||||
|
.get(currentBiboDocumentEntry.getKey());
|
||||||
|
|
||||||
|
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
|
||||||
|
|
||||||
|
for (Collaboration currentEdge : edges) {
|
||||||
|
Set<Activity> currentCollaboratorDocuments =
|
||||||
|
currentEdge.getCollaborationActivities();
|
||||||
|
|
||||||
|
if (currentCollaboratorDocuments.contains(currentBiboDocument)) {
|
||||||
|
currentCollaboratorDocuments.remove(currentBiboDocument);
|
||||||
|
if (currentCollaboratorDocuments.isEmpty()) {
|
||||||
|
edgesToBeRemoved.add(currentEdge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
edges.removeAll(edgesToBeRemoved);
|
||||||
|
|
||||||
|
for (Collaborator currentCoAuthor : currentBiboDocumentEntry.getValue()) {
|
||||||
|
currentCoAuthor.getCollaboratorActivities().remove(currentBiboDocument);
|
||||||
|
if (currentCoAuthor.getCollaboratorActivities().isEmpty()) {
|
||||||
|
nodesToBeRemoved.add(currentCoAuthor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nodes.removeAll(nodesToBeRemoved);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* END QUERY RUNNER */
|
||||||
}
|
}
|
||||||
|
|
||||||
private String generateEgoCoAuthorshipSparqlQuery(String queryURI) {
|
private String generateEgoCoAuthorshipSparqlQuery(String queryURI) {
|
||||||
|
|
||||||
String sparqlQuery = QueryConstants.getSparqlPrefixQuery()
|
String sparqlQuery = QueryConstants.getSparqlPrefixQuery()
|
||||||
|
@ -387,7 +381,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
|
||||||
+ " (str(?publicationDate) as ?"
|
+ " (str(?publicationDate) as ?"
|
||||||
+ QueryFieldLabels.DOCUMENT_PUBLICATION_DATE + ") \n"
|
+ QueryFieldLabels.DOCUMENT_PUBLICATION_DATE + ") \n"
|
||||||
+ "WHERE { \n"
|
+ "WHERE { \n"
|
||||||
+ "<" + queryURI + "> rdf:type foaf:Person ;"
|
+ "<" + queryURI + "> rdf:type foaf:Person ;"
|
||||||
+ " rdfs:label ?authorLabel ;"
|
+ " rdfs:label ?authorLabel ;"
|
||||||
+ " core:relatedBy ?authorshipNode . \n"
|
+ " core:relatedBy ?authorshipNode . \n"
|
||||||
+ "?authorshipNode rdf:type core:Authorship ;"
|
+ "?authorshipNode rdf:type core:Authorship ;"
|
||||||
|
@ -404,7 +398,7 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
|
||||||
+ "ORDER BY ?document ?coAuthorPerson\n";
|
+ "ORDER BY ?document ?coAuthorPerson\n";
|
||||||
|
|
||||||
log.debug("COAUTHORSHIP QUERY - " + sparqlQuery);
|
log.debug("COAUTHORSHIP QUERY - " + sparqlQuery);
|
||||||
|
|
||||||
return sparqlQuery;
|
return sparqlQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -455,19 +449,13 @@ public class CoAuthorshipQueryRunner implements QueryRunner<CollaborationData> {
|
||||||
throw new MalformedQueryParametersException("URI parameter is either null or empty.");
|
throw new MalformedQueryParametersException("URI parameter is either null or empty.");
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream is = null;
|
|
||||||
ResultSet rs = null;
|
|
||||||
try {
|
try {
|
||||||
is = rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), RDFService.ResultFormat.JSON);
|
QueryResultConsumer consumer = new QueryResultConsumer();
|
||||||
rs = ResultSetFactory.fromJSON(is);
|
rdfService.sparqlSelectQuery(generateEgoCoAuthorshipSparqlQuery(this.egoURI), consumer);
|
||||||
data = createQueryResult(rs);
|
data = consumer.getCollaborationData();
|
||||||
} catch (RDFServiceException e) {
|
} catch (RDFServiceException e) {
|
||||||
log.error("Unable to execute query", e);
|
log.error("Unable to execute query", e);
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} finally {
|
|
||||||
if (is != null) {
|
|
||||||
try { is.close(); } catch (Throwable t) { }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry();
|
CollaborationDataCacheEntry newEntry = new CollaborationDataCacheEntry();
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator;
|
package edu.cornell.mannlib.vitro.webapp.visualization.coprincipalinvestigator;
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
@ -13,9 +12,9 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import com.hp.hpl.jena.query.ResultSetFactory;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.ResultSetConsumer;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -23,14 +22,8 @@ import org.apache.jena.iri.IRI;
|
||||||
import org.apache.jena.iri.IRIFactory;
|
import org.apache.jena.iri.IRIFactory;
|
||||||
import org.apache.jena.iri.Violation;
|
import org.apache.jena.iri.Violation;
|
||||||
|
|
||||||
import com.hp.hpl.jena.query.Query;
|
|
||||||
import com.hp.hpl.jena.query.QueryExecution;
|
|
||||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
|
||||||
import com.hp.hpl.jena.query.QueryFactory;
|
|
||||||
import com.hp.hpl.jena.query.QuerySolution;
|
import com.hp.hpl.jena.query.QuerySolution;
|
||||||
import com.hp.hpl.jena.query.ResultSet;
|
|
||||||
import com.hp.hpl.jena.query.Syntax;
|
import com.hp.hpl.jena.query.Syntax;
|
||||||
import com.hp.hpl.jena.rdf.model.Model;
|
|
||||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData;
|
import edu.cornell.mannlib.vitro.webapp.visualization.collaborationutils.CoInvestigationData;
|
||||||
|
@ -60,12 +53,8 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
|
|
||||||
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
|
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
|
||||||
|
|
||||||
private UniqueIDGenerator nodeIDGenerator;
|
|
||||||
|
|
||||||
private UniqueIDGenerator edgeIDGenerator;
|
|
||||||
|
|
||||||
private long before, after;
|
private long before, after;
|
||||||
|
|
||||||
private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = ""
|
private static final String SPARQL_QUERY_COMMON_OPTIONAL_BLOCK_FOR_ROLE_DATE_TIME = ""
|
||||||
+ "OPTIONAL {"
|
+ "OPTIONAL {"
|
||||||
+ " ?Role core:dateTimeInterval ?dateTimeIntervalValue . "
|
+ " ?Role core:dateTimeInterval ?dateTimeIntervalValue . "
|
||||||
|
@ -95,9 +84,6 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
this.egoURI = egoURI;
|
this.egoURI = egoURI;
|
||||||
this.rdfService = rdfService;
|
this.rdfService = rdfService;
|
||||||
// this.log = log;
|
// this.log = log;
|
||||||
|
|
||||||
this.nodeIDGenerator = new UniqueIDGenerator();
|
|
||||||
this.edgeIDGenerator = new UniqueIDGenerator();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String generateEgoCoPIquery(String queryURI) {
|
private String generateEgoCoPIquery(String queryURI) {
|
||||||
|
@ -294,19 +280,13 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
|
|
||||||
before = System.currentTimeMillis();
|
before = System.currentTimeMillis();
|
||||||
|
|
||||||
InputStream is = null;
|
|
||||||
ResultSet rs = null;
|
|
||||||
try {
|
try {
|
||||||
is = rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), RDFService.ResultFormat.JSON);
|
QueryResultConsumer consumer = new QueryResultConsumer();
|
||||||
rs = ResultSetFactory.fromJSON(is);
|
rdfService.sparqlSelectQuery(generateEgoCoPIquery(this.egoURI), consumer);
|
||||||
data = createQueryResult(rs);
|
data = consumer.getData();
|
||||||
} catch (RDFServiceException e) {
|
} catch (RDFServiceException e) {
|
||||||
log.error("Unable to execute query", e);
|
log.error("Unable to execute query", e);
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} finally {
|
|
||||||
if (is != null) {
|
|
||||||
try { is.close(); } catch (Throwable t) { }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
after = System.currentTimeMillis();
|
after = System.currentTimeMillis();
|
||||||
|
|
||||||
|
@ -356,34 +336,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class QueryResultConsumer extends ResultSetConsumer {
|
||||||
private Collaboration getExistingEdge(
|
|
||||||
Collaborator collaboratingNode1,
|
|
||||||
Collaborator collaboratingNode2,
|
|
||||||
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
|
||||||
|
|
||||||
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
|
|
||||||
collaboratingNode1.getCollaboratorID(),
|
|
||||||
collaboratingNode2.getCollaboratorID());
|
|
||||||
|
|
||||||
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
|
|
||||||
|
|
||||||
String separator = "*";
|
|
||||||
|
|
||||||
if (nodeID1 < nodeID2) {
|
|
||||||
return nodeID1 + separator + nodeID2;
|
|
||||||
} else {
|
|
||||||
return nodeID2 + separator + nodeID1;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private CollaborationData createQueryResult(ResultSet resultSet) {
|
|
||||||
|
|
||||||
Set<Collaborator> nodes = new HashSet<Collaborator>();
|
Set<Collaborator> nodes = new HashSet<Collaborator>();
|
||||||
|
|
||||||
Map<String, Activity> grantURLToVO = new HashMap<String, Activity>();
|
Map<String, Activity> grantURLToVO = new HashMap<String, Activity>();
|
||||||
|
@ -394,229 +347,233 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
Collaborator egoNode = null;
|
Collaborator egoNode = null;
|
||||||
|
|
||||||
Set<Collaboration> edges = new HashSet<Collaboration>();
|
Set<Collaboration> edges = new HashSet<Collaboration>();
|
||||||
|
|
||||||
before = System.currentTimeMillis();
|
private UniqueIDGenerator nodeIDGenerator = new UniqueIDGenerator();
|
||||||
|
private UniqueIDGenerator edgeIDGenerator = new UniqueIDGenerator();
|
||||||
while (resultSet.hasNext()) {
|
|
||||||
QuerySolution solution = resultSet.nextSolution();
|
private Log log = LogFactory.getLog(CoPIGrantCountQueryRunner.class.getName());
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void processQuerySolution(QuerySolution qs) {
|
||||||
/*
|
/*
|
||||||
* We only want to create only ONE ego node.
|
* We only want to create only ONE ego node.
|
||||||
* */
|
* */
|
||||||
RDFNode egoPIURLNode = solution.get(QueryFieldLabels.PI_URL);
|
RDFNode egoPIURLNode = qs.get(QueryFieldLabels.PI_URL);
|
||||||
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
|
if (nodeURLToVO.containsKey(egoPIURLNode.toString())) {
|
||||||
|
|
||||||
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
|
egoNode = nodeURLToVO.get(egoPIURLNode.toString());
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
|
egoNode = new Collaborator(egoPIURLNode.toString(), nodeIDGenerator);
|
||||||
nodes.add(egoNode);
|
nodes.add(egoNode);
|
||||||
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
|
nodeURLToVO.put(egoPIURLNode.toString(), egoNode);
|
||||||
|
|
||||||
|
|
||||||
RDFNode authorLabelNode = solution.get(QueryFieldLabels.PI_LABEL);
|
RDFNode authorLabelNode = qs.get(QueryFieldLabels.PI_LABEL);
|
||||||
if (authorLabelNode != null) {
|
if (authorLabelNode != null) {
|
||||||
egoNode.setCollaboratorName(authorLabelNode.toString());
|
egoNode.setCollaboratorName(authorLabelNode.toString());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
log.debug("PI: " + egoNode.getIndividualLabel());
|
}
|
||||||
|
log.debug("PI: " + egoNode.getIndividualLabel());
|
||||||
RDFNode grantNode = solution.get(QueryFieldLabels.GRANT_URL);
|
|
||||||
Activity grant;
|
RDFNode grantNode = qs.get(QueryFieldLabels.GRANT_URL);
|
||||||
|
Activity grant;
|
||||||
if (grantURLToVO.containsKey(grantNode.toString())) {
|
|
||||||
grant = grantURLToVO.get(grantNode.toString());
|
if (grantURLToVO.containsKey(grantNode.toString())) {
|
||||||
} else {
|
grant = grantURLToVO.get(grantNode.toString());
|
||||||
grant = createGrantVO(solution, grantNode.toString());
|
} else {
|
||||||
grantURLToVO.put(grantNode.toString(), grant);
|
grant = createGrantVO(qs, grantNode.toString());
|
||||||
}
|
grantURLToVO.put(grantNode.toString(), grant);
|
||||||
|
}
|
||||||
egoNode.addActivity(grant);
|
|
||||||
log.debug("Adding grant: " + grant.getIndividualLabel());
|
egoNode.addActivity(grant);
|
||||||
|
log.debug("Adding grant: " + grant.getIndividualLabel());
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After some discussion we concluded that for the purpose of this visualization
|
* After some discussion we concluded that for the purpose of this visualization
|
||||||
* we do not want a co-pi node or edge if the grant has only one
|
* we do not want a co-pi node or edge if the grant has only one
|
||||||
* pi and that happens to be the ego.
|
* pi and that happens to be the ego.
|
||||||
* */
|
* */
|
||||||
if (solution.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
|
if (qs.get(QueryFieldLabels.PI_URL).toString().equalsIgnoreCase(
|
||||||
solution.get(QueryFieldLabels.CO_PI_URL).toString())) {
|
qs.get(QueryFieldLabels.CO_PI_URL).toString())) {
|
||||||
continue;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Collaborator coPINode;
|
||||||
|
|
||||||
|
RDFNode coPIURLNode = qs.get(QueryFieldLabels.CO_PI_URL);
|
||||||
|
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
|
||||||
|
|
||||||
|
coPINode = nodeURLToVO.get(coPIURLNode.toString());
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
|
||||||
|
nodes.add(coPINode);
|
||||||
|
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
|
||||||
|
|
||||||
|
RDFNode coPILabelNode = qs.get(QueryFieldLabels.CO_PI_LABEL);
|
||||||
|
if (coPILabelNode != null) {
|
||||||
|
coPINode.setCollaboratorName(coPILabelNode.toString());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Collaborator coPINode;
|
|
||||||
|
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
|
||||||
RDFNode coPIURLNode = solution.get(QueryFieldLabels.CO_PI_URL);
|
coPINode.addActivity(grant);
|
||||||
if (nodeURLToVO.containsKey(coPIURLNode.toString())) {
|
|
||||||
|
Set<Collaborator> coPIsForCurrentGrant;
|
||||||
coPINode = nodeURLToVO.get(coPIURLNode.toString());
|
|
||||||
|
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
|
||||||
} else {
|
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
|
||||||
|
} else {
|
||||||
coPINode = new Collaborator(coPIURLNode.toString(), nodeIDGenerator);
|
coPIsForCurrentGrant = new HashSet<Collaborator>();
|
||||||
nodes.add(coPINode);
|
grantURLToCoPIs.put(grant.getActivityURI(),
|
||||||
nodeURLToVO.put(coPIURLNode.toString(), coPINode);
|
coPIsForCurrentGrant);
|
||||||
|
}
|
||||||
RDFNode coPILabelNode = solution.get(QueryFieldLabels.CO_PI_LABEL);
|
|
||||||
if (coPILabelNode != null) {
|
coPIsForCurrentGrant.add(coPINode);
|
||||||
coPINode.setCollaboratorName(coPILabelNode.toString());
|
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
|
||||||
}
|
|
||||||
}
|
Collaboration egoCoPIEdge =
|
||||||
|
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
|
||||||
log.debug("Adding CO-PI: "+ coPINode.getIndividualLabel());
|
|
||||||
coPINode.addActivity(grant);
|
|
||||||
|
|
||||||
Set<Collaborator> coPIsForCurrentGrant;
|
|
||||||
|
|
||||||
if (grantURLToCoPIs.containsKey(grant.getActivityURI())) {
|
|
||||||
coPIsForCurrentGrant = grantURLToCoPIs.get(grant.getActivityURI());
|
|
||||||
} else {
|
|
||||||
coPIsForCurrentGrant = new HashSet<Collaborator>();
|
|
||||||
grantURLToCoPIs.put(grant.getActivityURI(),
|
|
||||||
coPIsForCurrentGrant);
|
|
||||||
}
|
|
||||||
|
|
||||||
coPIsForCurrentGrant.add(coPINode);
|
|
||||||
log.debug("Co-PI for current grant : " + coPINode.getIndividualLabel());
|
|
||||||
|
|
||||||
Collaboration egoCoPIEdge =
|
|
||||||
getExistingEdge(egoNode, coPINode, edgeUniqueIdentifierToVO);
|
|
||||||
/*
|
/*
|
||||||
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
|
* If "egoCoPIEdge" is null it means that no edge exists in between the egoNode
|
||||||
* & current coPINode. Else create a new edge, add it to the edges set & add
|
* & current coPINode. Else create a new edge, add it to the edges set & add
|
||||||
* the collaborator grant to it.
|
* the collaborator grant to it.
|
||||||
* */
|
* */
|
||||||
if (egoCoPIEdge != null) {
|
if (egoCoPIEdge != null) {
|
||||||
egoCoPIEdge.addActivity(grant);
|
egoCoPIEdge.addActivity(grant);
|
||||||
} else {
|
} else {
|
||||||
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
|
egoCoPIEdge = new Collaboration(egoNode, coPINode, grant, edgeIDGenerator);
|
||||||
edges.add(egoCoPIEdge);
|
edges.add(egoCoPIEdge);
|
||||||
edgeUniqueIdentifierToVO.put(
|
edgeUniqueIdentifierToVO.put(
|
||||||
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
|
getEdgeUniqueIdentifier(egoNode.getCollaboratorID(),
|
||||||
coPINode.getCollaboratorID()),
|
coPINode.getCollaboratorID()),
|
||||||
egoCoPIEdge);
|
egoCoPIEdge);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void endProcessing() {
|
||||||
|
super.endProcessing();
|
||||||
/*
|
/*
|
||||||
* This method takes out all the PIs & edges between PIs that belong to grants
|
* This method takes out all the PIs & edges between PIs that belong to grants
|
||||||
* that have more than 100 PIs. We conjecture that these grants do not provide much
|
* that have more than 100 PIs. We conjecture that these grants do not provide much
|
||||||
* insight. However, we have left the grants be.
|
* insight. However, we have left the grants be.
|
||||||
* This method side-effects "nodes" & "edges".
|
* This method side-effects "nodes" & "edges".
|
||||||
* */
|
* */
|
||||||
removeLowQualityNodesAndEdges(nodes,
|
removeLowQualityNodesAndEdges(nodes,
|
||||||
grantURLToVO,
|
grantURLToVO,
|
||||||
grantURLToCoPIs,
|
grantURLToCoPIs,
|
||||||
edges);
|
edges);
|
||||||
/*
|
/*
|
||||||
* We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI
|
* We need to create edges between 2 co-PIs. E.g. On a grant there were 3 PI
|
||||||
* ego, A & B then we have already created edges like,
|
* ego, A & B then we have already created edges like,
|
||||||
* ego - A
|
* ego - A
|
||||||
* ego - B
|
* ego - B
|
||||||
* The below sub-routine will take care of,
|
* The below sub-routine will take care of,
|
||||||
* A - B
|
* A - B
|
||||||
*
|
*
|
||||||
* We are side-effecting "edges" here. The only reason to do this is because we are
|
* We are side-effecting "edges" here. The only reason to do this is because we are
|
||||||
* adding edges en masse for all the co-PIs on all the grants considered so far. The
|
* adding edges en masse for all the co-PIs on all the grants considered so far. The
|
||||||
* other reason being we dont want to compare against 2 sets of edges (edges created
|
* other reason being we dont want to compare against 2 sets of edges (edges created
|
||||||
* before & co-PI edges created during the course of this method) when we are creating
|
* before & co-PI edges created during the course of this method) when we are creating
|
||||||
* a new edge.
|
* a new edge.
|
||||||
* */
|
* */
|
||||||
createCoPIEdges(grantURLToVO,
|
createCoPIEdges(grantURLToVO,
|
||||||
grantURLToCoPIs,
|
grantURLToCoPIs,
|
||||||
edges,
|
edges,
|
||||||
edgeUniqueIdentifierToVO);
|
edgeUniqueIdentifierToVO);
|
||||||
|
}
|
||||||
after = System.currentTimeMillis();
|
|
||||||
log.debug("Time taken to iterate through the ResultSet of SELECT queries is in ms: "
|
|
||||||
+ (after - before));
|
public CollaborationData getData() {
|
||||||
|
return new CoInvestigationData(egoNode, nodes, edges);
|
||||||
return new CoInvestigationData(egoNode, nodes, edges);
|
}
|
||||||
}
|
|
||||||
|
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
|
||||||
|
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
|
||||||
|
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
||||||
|
|
||||||
|
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
|
||||||
|
: grantURLToCoPIs.entrySet()) {
|
||||||
|
|
||||||
private void createCoPIEdges(Map<String, Activity> grantURLToVO,
|
|
||||||
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges,
|
|
||||||
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
|
||||||
|
|
||||||
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
|
|
||||||
: grantURLToCoPIs.entrySet()) {
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was only one co-PI (other than ego) then we dont have to create any
|
* If there was only one co-PI (other than ego) then we dont have to create any
|
||||||
* edges. so the below condition will take care of that.
|
* edges. so the below condition will take care of that.
|
||||||
*
|
*
|
||||||
* We are restricting edges between co-PI if a particular grant has more than
|
* We are restricting edges between co-PI if a particular grant has more than
|
||||||
* 100 co-PIs. Our conjecture is that such edges do not provide any good insight
|
* 100 co-PIs. Our conjecture is that such edges do not provide any good insight
|
||||||
* & causes unnecessary computations causing the server to time-out.
|
* & causes unnecessary computations causing the server to time-out.
|
||||||
* */
|
* */
|
||||||
if (currentGrantEntry.getValue().size() > 1
|
if (currentGrantEntry.getValue().size() > 1
|
||||||
&& currentGrantEntry.getValue().size()
|
&& currentGrantEntry.getValue().size()
|
||||||
<= MAX_PI_PER_GRANT_ALLOWED) {
|
<= MAX_PI_PER_GRANT_ALLOWED) {
|
||||||
|
|
||||||
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
|
Set<Collaboration> newlyAddedEdges = new HashSet<Collaboration>();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In order to leverage the nested "for loop" for making edges between all the
|
* In order to leverage the nested "for loop" for making edges between all the
|
||||||
* co-PIs we need to create a list out of the set first.
|
* co-PIs we need to create a list out of the set first.
|
||||||
* */
|
* */
|
||||||
List<Collaborator> coPINodes =
|
List<Collaborator> coPINodes =
|
||||||
new ArrayList<Collaborator>(currentGrantEntry.getValue());
|
new ArrayList<Collaborator>(currentGrantEntry.getValue());
|
||||||
Collections.sort(coPINodes, new CollaboratorComparator());
|
Collections.sort(coPINodes, new CollaboratorComparator());
|
||||||
|
|
||||||
int numOfCoPIs = coPINodes.size();
|
int numOfCoPIs = coPINodes.size();
|
||||||
|
|
||||||
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
|
for (int ii = 0; ii < numOfCoPIs - 1; ii++) {
|
||||||
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
|
for (int jj = ii + 1; jj < numOfCoPIs; jj++) {
|
||||||
|
|
||||||
Collaborator coPI1 = coPINodes.get(ii);
|
Collaborator coPI1 = coPINodes.get(ii);
|
||||||
Collaborator coPI2 = coPINodes.get(jj);
|
Collaborator coPI2 = coPINodes.get(jj);
|
||||||
|
|
||||||
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
|
Collaboration coPI1_2Edge = getExistingEdge(coPI1,
|
||||||
coPI2,
|
coPI2,
|
||||||
edgeUniqueIdentifierToVO);
|
edgeUniqueIdentifierToVO);
|
||||||
|
|
||||||
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
|
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
|
||||||
|
|
||||||
if (coPI1_2Edge != null) {
|
if (coPI1_2Edge != null) {
|
||||||
coPI1_2Edge.addActivity(currentGrant);
|
coPI1_2Edge.addActivity(currentGrant);
|
||||||
} else {
|
} else {
|
||||||
coPI1_2Edge = new Collaboration(coPI1,
|
coPI1_2Edge = new Collaboration(coPI1,
|
||||||
coPI2,
|
coPI2,
|
||||||
currentGrant,
|
currentGrant,
|
||||||
edgeIDGenerator);
|
edgeIDGenerator);
|
||||||
newlyAddedEdges.add(coPI1_2Edge);
|
newlyAddedEdges.add(coPI1_2Edge);
|
||||||
edgeUniqueIdentifierToVO.put(
|
edgeUniqueIdentifierToVO.put(
|
||||||
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
|
getEdgeUniqueIdentifier(coPI1.getCollaboratorID(),
|
||||||
coPI2.getCollaboratorID()),
|
coPI2.getCollaboratorID()),
|
||||||
coPI1_2Edge);
|
coPI1_2Edge);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
edges.addAll(newlyAddedEdges);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
edges.addAll(newlyAddedEdges);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
|
private void removeLowQualityNodesAndEdges(Set<Collaborator> nodes,
|
||||||
Map<String, Activity> grantURLToVO,
|
Map<String, Activity> grantURLToVO,
|
||||||
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
|
Map<String, Set<Collaborator>> grantURLToCoPIs, Set<Collaboration> edges) {
|
||||||
|
|
||||||
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
|
Set<Collaborator> nodesToBeRemoved = new HashSet<Collaborator>();
|
||||||
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
|
for (Map.Entry<String, Set<Collaborator>> currentGrantEntry
|
||||||
: grantURLToCoPIs.entrySet()) {
|
: grantURLToCoPIs.entrySet()) {
|
||||||
|
|
||||||
if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) {
|
if (currentGrantEntry.getValue().size() > MAX_PI_PER_GRANT_ALLOWED) {
|
||||||
|
|
||||||
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
|
Activity currentGrant = grantURLToVO.get(currentGrantEntry.getKey());
|
||||||
|
|
||||||
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
|
Set<Collaboration> edgesToBeRemoved = new HashSet<Collaboration>();
|
||||||
|
|
||||||
for (Collaboration currentEdge : edges) {
|
for (Collaboration currentEdge : edges) {
|
||||||
Set<Activity> currentCollaboratorGrants =
|
Set<Activity> currentCollaboratorGrants =
|
||||||
currentEdge.getCollaborationActivities();
|
currentEdge.getCollaborationActivities();
|
||||||
|
|
||||||
if (currentCollaboratorGrants.contains(currentGrant)) {
|
if (currentCollaboratorGrants.contains(currentGrant)) {
|
||||||
currentCollaboratorGrants.remove(currentGrant);
|
currentCollaboratorGrants.remove(currentGrant);
|
||||||
if (currentCollaboratorGrants.isEmpty()) {
|
if (currentCollaboratorGrants.isEmpty()) {
|
||||||
|
@ -624,7 +581,7 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
edges.removeAll(edgesToBeRemoved);
|
edges.removeAll(edgesToBeRemoved);
|
||||||
|
|
||||||
for (Collaborator currentCoPI : currentGrantEntry.getValue()) {
|
for (Collaborator currentCoPI : currentGrantEntry.getValue()) {
|
||||||
|
@ -634,14 +591,14 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nodes.removeAll(nodesToBeRemoved);
|
nodes.removeAll(nodesToBeRemoved);
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private Activity createGrantVO(QuerySolution solution, String grantURL) {
|
}
|
||||||
|
|
||||||
Activity grant = new Activity(grantURL);
|
private Activity createGrantVO(QuerySolution solution, String grantURL) {
|
||||||
|
|
||||||
|
Activity grant = new Activity(grantURL);
|
||||||
|
|
||||||
// RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL);
|
// RDFNode grantLabelNode = solution.get(QueryFieldLabels.GRANT_LABEL);
|
||||||
// if (grantLabelNode != null) {
|
// if (grantLabelNode != null) {
|
||||||
|
@ -649,17 +606,17 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
|
||||||
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
|
RDFNode grantStartYear = solution.get(QueryFieldLabels.ROLE_START_DATE);
|
||||||
if (grantStartYear != null) {
|
|
||||||
grant.setActivityDate(grantStartYear.toString());
|
|
||||||
} else {
|
|
||||||
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
|
|
||||||
if (grantStartYear != null) {
|
if (grantStartYear != null) {
|
||||||
grant.setActivityDate(grantStartYear.toString());
|
grant.setActivityDate(grantStartYear.toString());
|
||||||
}
|
} else {
|
||||||
}
|
grantStartYear = solution.get(QueryFieldLabels.GRANT_START_DATE);
|
||||||
|
if (grantStartYear != null) {
|
||||||
//TODO: Verify that grant end date is not required.
|
grant.setActivityDate(grantStartYear.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: Verify that grant end date is not required.
|
||||||
/*
|
/*
|
||||||
RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE);
|
RDFNode grantEndDate = solution.get(QueryFieldLabels.ROLE_END_DATE);
|
||||||
if (grantEndDate != null) {
|
if (grantEndDate != null) {
|
||||||
|
@ -668,10 +625,37 @@ public class CoPIGrantCountQueryRunner implements QueryRunner<CollaborationData>
|
||||||
grantEndDate = solution.get(QueryFieldLabels.GRANT_END_DATE);
|
grantEndDate = solution.get(QueryFieldLabels.GRANT_END_DATE);
|
||||||
if(grantEndDate != null){
|
if(grantEndDate != null){
|
||||||
grant.setGrantEndDate(grantEndDate.toString());
|
grant.setGrantEndDate(grantEndDate.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
return grant;
|
return grant;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Collaboration getExistingEdge(
|
||||||
|
Collaborator collaboratingNode1,
|
||||||
|
Collaborator collaboratingNode2,
|
||||||
|
Map<String, Collaboration> edgeUniqueIdentifierToVO) {
|
||||||
|
|
||||||
|
String edgeUniqueIdentifier = getEdgeUniqueIdentifier(
|
||||||
|
collaboratingNode1.getCollaboratorID(),
|
||||||
|
collaboratingNode2.getCollaboratorID());
|
||||||
|
|
||||||
|
return edgeUniqueIdentifierToVO.get(edgeUniqueIdentifier);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getEdgeUniqueIdentifier(int nodeID1, int nodeID2) {
|
||||||
|
|
||||||
|
String separator = "*";
|
||||||
|
|
||||||
|
if (nodeID1 < nodeID2) {
|
||||||
|
return nodeID1 + separator + nodeID2;
|
||||||
|
} else {
|
||||||
|
return nodeID2 + separator + nodeID1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
/** END QUERY RESULT CONSUMER **/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue