Replace old, undistributed CSV parser library with Commons CSV

This commit is contained in:
Graham Triggs 2016-12-19 22:24:12 +00:00
parent 47a18844c4
commit 45bad8174d
4 changed files with 83 additions and 80 deletions

View file

@ -141,7 +141,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
}
public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) throws Exception {
char[] quoteChars = {'"'};
char quoteChar = '"';
String namespace = "";
String tboxNamespace = vreq.getParameter("tboxNamespace");
String typeName = vreq.getParameter("typeName");
@ -162,7 +162,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
separatorChar = '\t';
}
Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChars,namespace,tboxNamespace,typeName);
Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChar,namespace,tboxNamespace,typeName);
InputStream is = null;

View file

@ -5,14 +5,16 @@ package edu.cornell.mannlib.vitro.webapp.ontology.update;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.skife.csv.CSVReader;
import org.skife.csv.SimpleReader;
import edu.cornell.mannlib.vitro.webapp.ontology.update.AtomicOntologyChange.AtomicChangeType;
@ -35,7 +37,6 @@ public class OntologyChangeParser {
/**
* @param diffPath Diff path
* @throws IOException
*/
@SuppressWarnings({ "unchecked", "null", "static-access" })
@ -50,52 +51,53 @@ public class OntologyChangeParser {
String destinationURI = null;
StringTokenizer stArr = null;
FileInputStream in = new FileInputStream(new File(diffPath));
CSVReader readFile = new SimpleReader();
readFile.setSeperator('\t');
List<String[]> rows = readFile.parse(in);
for(int rowNum = 0; rowNum < rows.size(); rowNum++){
String[] cols = rows.get(rowNum);
if (cols.length != 5) {
logger.logError("Invalid PromptDiff data at row " + (rowNum + 1)
+ ". Expected 5 columns; found " + cols.length );
CSVParser readFile = new CSVParser(new InputStreamReader(in),
CSVFormat.DEFAULT.withRecordSeparator('\t'));
int rowNum = 0;
for (CSVRecord record : readFile) {
rowNum++;
if (record.size() != 5) {
logger.logError("Invalid PromptDiff data at row " + (rowNum)
+ ". Expected 5 columns; found " + record.size() );
} else {
String col = null;
changeObj = new AtomicOntologyChange();
if (cols[0] != null && cols[0].length() > 0) {
changeObj.setSourceURI(cols[0]);
}
if (cols[1] != null && cols[1].length() > 0) {
changeObj.setDestinationURI(cols[1]);
col = record.get(0);
if (col != null && col.length() > 0) {
changeObj.setSourceURI(col);
}
if (cols[4] != null && cols[4].length() > 0) {
changeObj.setNotes(cols[4]);
col = record.get(1);
if (col != null && col.length() > 0) {
changeObj.setDestinationURI(col);
}
col = record.get(4);
if (col != null && col.length() > 0) {
changeObj.setNotes(col);
}
if ("Yes".equals(cols[2])) {
if ("Yes".equals(record.get(2))) {
changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
} else if ("Delete".equals(cols[3])) {
} else if ("Delete".equals(record.get(3))) {
changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
} else if ("Add".equals(cols[3])) {
} else if ("Add".equals(record.get(3))) {
changeObj.setAtomicChangeType(AtomicChangeType.ADD);
} else {
logger.logError("Invalid rename or change type data: '" +
cols[2] + " " + cols[3] + "'");
record.get(2) + " " + record.get(3) + "'");
}
log.debug(changeObj);
changeObjects.add(changeObj);
}
}
readFile.close();
if (changeObjects.size() == 0) {
logger.log("No ABox updates are required.");

View file

@ -4,14 +4,15 @@ package edu.cornell.mannlib.vitro.webapp.utils;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Random;
import javax.servlet.http.HttpServletRequest;
import org.skife.csv.CSVReader;
import org.skife.csv.SimpleReader;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.ontology.DatatypeProperty;
import org.apache.jena.ontology.Individual;
@ -34,11 +35,11 @@ public class Csv2Rdf {
private String individualNameBase;
private String propertyNameBase;
private char separatorChar;
private char[] quoteChars;
private char quoteChar;
public Csv2Rdf(char[] quoteChars, String namespace, String tboxNamespace, String typeName) {
public Csv2Rdf(char quoteChar, String namespace, String tboxNamespace, String typeName) {
this.separatorChar = ',';
this.quoteChars = quoteChars;
this.quoteChar = quoteChar;
this.namespace = namespace;
this.tboxNamespace = tboxNamespace;
this.typeName = typeName;
@ -46,9 +47,9 @@ public class Csv2Rdf {
this.propertyNameBase = individualNameBase+"_";
}
public Csv2Rdf(char separatorChar, char[] quoteChars, String namespace, String tboxNamespace, String typeName) {
public Csv2Rdf(char separatorChar, char quoteChar, String namespace, String tboxNamespace, String typeName) {
this.separatorChar = separatorChar;
this.quoteChars = quoteChars;
this.quoteChar = quoteChar;
this.namespace = namespace;
this.tboxNamespace = tboxNamespace;
this.typeName = typeName;
@ -66,47 +67,48 @@ public class Csv2Rdf {
ontModel.addSubModel(tboxOntModel);
OntClass theClass = tboxOntModel.createClass(tboxNamespace+typeName);
CSVReader cReader = new SimpleReader();
cReader.setSeperator(separatorChar);
cReader.setQuoteCharacters(quoteChars);
URIGenerator uriGen = (wadf != null && destination != null)
URIGenerator uriGen = (wadf != null && destination != null)
? new RandomURIGenerator(wadf, destination)
: new SequentialURIGenerator();
List<String[]> fileRows = cReader.parse(fis);
String[] columnHeaders = fileRows.get(0);
: new SequentialURIGenerator();
DatatypeProperty[] dpArray = new DatatypeProperty[columnHeaders.length];
CSVParser cReader = new CSVParser(new InputStreamReader(fis),
CSVFormat.DEFAULT.withRecordSeparator(separatorChar)
.withQuote(quoteChar));
for (int i=0; i<columnHeaders.length; i++) {
dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+columnHeaders[i].replaceAll("\\W",""));
}
Individual ind = null;
for (int row=1; row<fileRows.size(); row++) {
String uri = uriGen.getNextURI();
if(uri!=null)
ind = ontModel.createIndividual(uri,theClass);
else
ind = ontModel.createIndividual(theClass);
String[] cols = fileRows.get(row);
for (int col=0; col<cols.length; col++) {
String value = cols[col].trim();
if (value.length()>0) {
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring);
// TODO: specification of datatypes for columns
}
}
}
DatatypeProperty[] dpArray = null;
for (CSVRecord cRecord : cReader) {
if (dpArray == null) {
dpArray = new DatatypeProperty[cRecord.size()];
for (int i = 0; i < dpArray.length; i++) {
dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+cRecord.get(i).replaceAll("\\W",""));
}
} else {
Individual ind = null;
String uri = uriGen.getNextURI();
if (uri!=null) {
ind = ontModel.createIndividual(uri, theClass);
} else {
ind = ontModel.createIndividual(theClass);
}
for (int col = 0; col<cRecord.size() && col < dpArray.length; col++) {
String value = cRecord.get(col).trim();
if (value.length()>0) {
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring);
// TODO: specification of datatypes for columns
}
}
}
}
cReader.close();
ontModel.removeSubModel(tboxOntModel);
Model[] resultModels = new Model[2];
resultModels[0] = ontModel;
resultModels[1] = tboxOntModel;
return resultModels;
}
private interface URIGenerator {

11
dependencies/pom.xml vendored
View file

@ -111,6 +111,11 @@
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
@ -243,12 +248,6 @@
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.vivoweb.dependencies</groupId>
<artifactId>csv</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>javax.json</groupId>
<artifactId>javax.json-api</artifactId>