Replace old, undistributed CSV parser library with Commons CSV

This commit is contained in:
Graham Triggs 2016-12-19 22:24:12 +00:00
parent 47a18844c4
commit 45bad8174d
4 changed files with 83 additions and 80 deletions

View file

@ -141,7 +141,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
} }
public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) throws Exception { public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) throws Exception {
char[] quoteChars = {'"'}; char quoteChar = '"';
String namespace = ""; String namespace = "";
String tboxNamespace = vreq.getParameter("tboxNamespace"); String tboxNamespace = vreq.getParameter("tboxNamespace");
String typeName = vreq.getParameter("typeName"); String typeName = vreq.getParameter("typeName");
@ -162,7 +162,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
separatorChar = '\t'; separatorChar = '\t';
} }
Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChars,namespace,tboxNamespace,typeName); Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChar,namespace,tboxNamespace,typeName);
InputStream is = null; InputStream is = null;

View file

@ -5,14 +5,16 @@ package edu.cornell.mannlib.vitro.webapp.ontology.update;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.skife.csv.CSVReader;
import org.skife.csv.SimpleReader;
import edu.cornell.mannlib.vitro.webapp.ontology.update.AtomicOntologyChange.AtomicChangeType; import edu.cornell.mannlib.vitro.webapp.ontology.update.AtomicOntologyChange.AtomicChangeType;
@ -35,7 +37,6 @@ public class OntologyChangeParser {
/** /**
* @param diffPath Diff path * @param diffPath Diff path
* @throws IOException
*/ */
@SuppressWarnings({ "unchecked", "null", "static-access" }) @SuppressWarnings({ "unchecked", "null", "static-access" })
@ -50,52 +51,53 @@ public class OntologyChangeParser {
String destinationURI = null; String destinationURI = null;
StringTokenizer stArr = null; StringTokenizer stArr = null;
FileInputStream in = new FileInputStream(new File(diffPath)); FileInputStream in = new FileInputStream(new File(diffPath));
CSVReader readFile = new SimpleReader();
readFile.setSeperator('\t'); CSVParser readFile = new CSVParser(new InputStreamReader(in),
CSVFormat.DEFAULT.withRecordSeparator('\t'));
List<String[]> rows = readFile.parse(in);
int rowNum = 0;
for(int rowNum = 0; rowNum < rows.size(); rowNum++){ for (CSVRecord record : readFile) {
rowNum++;
String[] cols = rows.get(rowNum); if (record.size() != 5) {
if (cols.length != 5) { logger.logError("Invalid PromptDiff data at row " + (rowNum)
logger.logError("Invalid PromptDiff data at row " + (rowNum + 1) + ". Expected 5 columns; found " + record.size() );
+ ". Expected 5 columns; found " + cols.length );
} else { } else {
String col = null;
changeObj = new AtomicOntologyChange(); changeObj = new AtomicOntologyChange();
if (cols[0] != null && cols[0].length() > 0) { col = record.get(0);
changeObj.setSourceURI(cols[0]); if (col != null && col.length() > 0) {
} changeObj.setSourceURI(col);
if (cols[1] != null && cols[1].length() > 0) {
changeObj.setDestinationURI(cols[1]);
} }
if (cols[4] != null && cols[4].length() > 0) { col = record.get(1);
changeObj.setNotes(cols[4]); if (col != null && col.length() > 0) {
changeObj.setDestinationURI(col);
}
col = record.get(4);
if (col != null && col.length() > 0) {
changeObj.setNotes(col);
} }
if ("Yes".equals(cols[2])) { if ("Yes".equals(record.get(2))) {
changeObj.setAtomicChangeType(AtomicChangeType.RENAME); changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
} else if ("Delete".equals(cols[3])) { } else if ("Delete".equals(record.get(3))) {
changeObj.setAtomicChangeType(AtomicChangeType.DELETE); changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
} else if ("Add".equals(cols[3])) { } else if ("Add".equals(record.get(3))) {
changeObj.setAtomicChangeType(AtomicChangeType.ADD); changeObj.setAtomicChangeType(AtomicChangeType.ADD);
} else { } else {
logger.logError("Invalid rename or change type data: '" + logger.logError("Invalid rename or change type data: '" +
cols[2] + " " + cols[3] + "'"); record.get(2) + " " + record.get(3) + "'");
} }
log.debug(changeObj); log.debug(changeObj);
changeObjects.add(changeObj); changeObjects.add(changeObj);
} }
} }
readFile.close();
if (changeObjects.size() == 0) { if (changeObjects.size() == 0) {
logger.log("No ABox updates are required."); logger.log("No ABox updates are required.");

View file

@ -4,14 +4,15 @@ package edu.cornell.mannlib.vitro.webapp.utils;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import org.skife.csv.CSVReader; import org.apache.commons.csv.CSVFormat;
import org.skife.csv.SimpleReader; import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.jena.datatypes.xsd.XSDDatatype; import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.ontology.DatatypeProperty; import org.apache.jena.ontology.DatatypeProperty;
import org.apache.jena.ontology.Individual; import org.apache.jena.ontology.Individual;
@ -34,11 +35,11 @@ public class Csv2Rdf {
private String individualNameBase; private String individualNameBase;
private String propertyNameBase; private String propertyNameBase;
private char separatorChar; private char separatorChar;
private char[] quoteChars; private char quoteChar;
public Csv2Rdf(char[] quoteChars, String namespace, String tboxNamespace, String typeName) { public Csv2Rdf(char quoteChar, String namespace, String tboxNamespace, String typeName) {
this.separatorChar = ','; this.separatorChar = ',';
this.quoteChars = quoteChars; this.quoteChar = quoteChar;
this.namespace = namespace; this.namespace = namespace;
this.tboxNamespace = tboxNamespace; this.tboxNamespace = tboxNamespace;
this.typeName = typeName; this.typeName = typeName;
@ -46,9 +47,9 @@ public class Csv2Rdf {
this.propertyNameBase = individualNameBase+"_"; this.propertyNameBase = individualNameBase+"_";
} }
public Csv2Rdf(char separatorChar, char[] quoteChars, String namespace, String tboxNamespace, String typeName) { public Csv2Rdf(char separatorChar, char quoteChar, String namespace, String tboxNamespace, String typeName) {
this.separatorChar = separatorChar; this.separatorChar = separatorChar;
this.quoteChars = quoteChars; this.quoteChar = quoteChar;
this.namespace = namespace; this.namespace = namespace;
this.tboxNamespace = tboxNamespace; this.tboxNamespace = tboxNamespace;
this.typeName = typeName; this.typeName = typeName;
@ -66,47 +67,48 @@ public class Csv2Rdf {
ontModel.addSubModel(tboxOntModel); ontModel.addSubModel(tboxOntModel);
OntClass theClass = tboxOntModel.createClass(tboxNamespace+typeName); OntClass theClass = tboxOntModel.createClass(tboxNamespace+typeName);
CSVReader cReader = new SimpleReader(); URIGenerator uriGen = (wadf != null && destination != null)
cReader.setSeperator(separatorChar);
cReader.setQuoteCharacters(quoteChars);
URIGenerator uriGen = (wadf != null && destination != null)
? new RandomURIGenerator(wadf, destination) ? new RandomURIGenerator(wadf, destination)
: new SequentialURIGenerator(); : new SequentialURIGenerator();
List<String[]> fileRows = cReader.parse(fis);
String[] columnHeaders = fileRows.get(0);
DatatypeProperty[] dpArray = new DatatypeProperty[columnHeaders.length]; CSVParser cReader = new CSVParser(new InputStreamReader(fis),
CSVFormat.DEFAULT.withRecordSeparator(separatorChar)
.withQuote(quoteChar));
for (int i=0; i<columnHeaders.length; i++) { DatatypeProperty[] dpArray = null;
dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+columnHeaders[i].replaceAll("\\W",""));
} for (CSVRecord cRecord : cReader) {
Individual ind = null; if (dpArray == null) {
for (int row=1; row<fileRows.size(); row++) { dpArray = new DatatypeProperty[cRecord.size()];
String uri = uriGen.getNextURI();
if(uri!=null) for (int i = 0; i < dpArray.length; i++) {
ind = ontModel.createIndividual(uri,theClass); dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+cRecord.get(i).replaceAll("\\W",""));
else }
ind = ontModel.createIndividual(theClass); } else {
String[] cols = fileRows.get(row); Individual ind = null;
for (int col=0; col<cols.length; col++) { String uri = uriGen.getNextURI();
String value = cols[col].trim(); if (uri!=null) {
if (value.length()>0) { ind = ontModel.createIndividual(uri, theClass);
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring); } else {
// TODO: specification of datatypes for columns ind = ontModel.createIndividual(theClass);
} }
} for (int col = 0; col<cRecord.size() && col < dpArray.length; col++) {
} String value = cRecord.get(col).trim();
if (value.length()>0) {
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring);
// TODO: specification of datatypes for columns
}
}
}
}
cReader.close();
ontModel.removeSubModel(tboxOntModel); ontModel.removeSubModel(tboxOntModel);
Model[] resultModels = new Model[2]; Model[] resultModels = new Model[2];
resultModels[0] = ontModel; resultModels[0] = ontModel;
resultModels[1] = tboxOntModel; resultModels[1] = tboxOntModel;
return resultModels; return resultModels;
} }
private interface URIGenerator { private interface URIGenerator {

11
dependencies/pom.xml vendored
View file

@ -111,6 +111,11 @@
<artifactId>commons-lang3</artifactId> <artifactId>commons-lang3</artifactId>
<version>3.4</version> <version>3.4</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
</dependency>
<dependency> <dependency>
<groupId>commons-logging</groupId> <groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId> <artifactId>commons-logging</artifactId>
@ -243,12 +248,6 @@
<version>1.2.1</version> <version>1.2.1</version>
</dependency> </dependency>
<dependency>
<groupId>org.vivoweb.dependencies</groupId>
<artifactId>csv</artifactId>
<version>1.0</version>
</dependency>
<dependency> <dependency>
<groupId>javax.json</groupId> <groupId>javax.json</groupId>
<artifactId>javax.json-api</artifactId> <artifactId>javax.json-api</artifactId>